xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision 82c86c8f8a181c1295b98e018a5a0771744eccf1)
1 
2 /*
3     The memory scalable AO application ordering routines. These store the
4   local orderings on each processor.
5 */
6 
7 #include <../src/vec/is/ao/aoimpl.h>          /*I  "petscao.h"   I*/
8 
9 typedef struct {
10   PetscInt    *app_loc;    /* app_loc[i] is the partner for the ith local PETSc slot */
11   PetscInt    *petsc_loc;  /* petsc_loc[j] is the partner for the jth local app slot */
12   PetscLayout map;         /* determines the local sizes of ao */
13 } AO_MemoryScalable;
14 
15 /*
16        All processors have the same data so processor 1 prints it
17 */
18 #undef __FUNCT__
19 #define __FUNCT__ "AOView_MemoryScalable"
20 PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
21 {
22   PetscErrorCode    ierr;
23   PetscMPIInt       rank,size;
24   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
25   PetscBool         iascii;
26   PetscMPIInt       tag_app,tag_petsc;
27   PetscLayout       map = aomems->map;
28   PetscInt          *app,*app_loc,*petsc,*petsc_loc,len,i,j;
29   MPI_Status        status;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
33   if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);
34 
35   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);CHKERRQ(ierr);
36   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);CHKERRQ(ierr);
37 
38   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_app);CHKERRQ(ierr);
39   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);CHKERRQ(ierr);
40 
41   if (!rank) {
42     ierr = PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);CHKERRQ(ierr);
43     ierr = PetscViewerASCIIPrintf(viewer,  "PETSc->App  App->PETSc\n");CHKERRQ(ierr);
44 
45     ierr = PetscMalloc2(map->N,PetscInt,&app,map->N,PetscInt,&petsc);CHKERRQ(ierr);
46     len  = map->n;
47     /* print local AO */
48     ierr = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",rank);CHKERRQ(ierr);
49     for (i=0; i<len; i++) {
50       ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);CHKERRQ(ierr);
51     }
52 
53     /* recv and print off-processor's AO */
54     for (i=1; i<size; i++) {
55       len       = map->range[i+1] - map->range[i];
56       app_loc   = app  + map->range[i];
57       petsc_loc = petsc+ map->range[i];
58       ierr      = MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
59       ierr      = MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
60       ierr      = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);CHKERRQ(ierr);
61       for (j=0; j<len; j++) {
62         ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);CHKERRQ(ierr);
63       }
64     }
65     ierr = PetscFree2(app,petsc);CHKERRQ(ierr);
66 
67   } else {
68     /* send values */
69     ierr = MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
70     ierr = MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
71   }
72   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "AODestroy_MemoryScalable"
78 PetscErrorCode AODestroy_MemoryScalable(AO ao)
79 {
80   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
81   PetscErrorCode    ierr;
82 
83   PetscFunctionBegin;
84   ierr = PetscFree2(aomems->app_loc,aomems->petsc_loc);CHKERRQ(ierr);
85   ierr = PetscLayoutDestroy(&aomems->map);CHKERRQ(ierr);
86   ierr = PetscFree(aomems);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 /*
91    Input Parameters:
92 +   ao - the application ordering context
93 .   n  - the number of integers in ia[]
94 .   ia - the integers; these are replaced with their mapped value
95 -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
96 
97    Output Parameter:
98 .   ia - the mapped interges
99  */
100 #undef __FUNCT__
101 #define __FUNCT__ "AOMap_MemoryScalable_private"
102 PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,PetscInt *maploc)
103 {
104   PetscErrorCode    ierr;
105   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
106   MPI_Comm          comm;
107   PetscMPIInt       rank,size,tag1,tag2;
108   PetscInt          *owner,*start,*nprocs,nsends,nreceives;
109   PetscInt          nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
110   PetscInt          *owners = aomems->map->range;
111   MPI_Request       *send_waits,*recv_waits,*send_waits2,*recv_waits2;
112   MPI_Status        recv_status;
113   PetscMPIInt       nindices,source,widx;
114   PetscInt          *rbuf,*sbuf;
115   MPI_Status        *send_status,*send_status2;
116 
117   PetscFunctionBegin;
118   ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr);
119   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
120   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
121 
122   /*  first count number of contributors to each processor */
123   ierr = PetscMalloc2(2*size,PetscInt,&nprocs,size,PetscInt,&start);CHKERRQ(ierr);
124   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
125   ierr = PetscMalloc(n*sizeof(PetscInt),&owner);CHKERRQ(ierr);
126   ierr = PetscMemzero(owner,n*sizeof(PetscInt));CHKERRQ(ierr);
127 
128   j       = 0;
129   lastidx = -1;
130   for (i=0; i<n; i++) {
131     /* if indices are NOT locally sorted, need to start search at the beginning */
132     if (lastidx > (idx = ia[i])) j = 0;
133     lastidx = idx;
134     for (; j<size; j++) {
135       if (idx >= owners[j] && idx < owners[j+1]) {
136         nprocs[2*j]++;     /* num of indices to be sent */
137         nprocs[2*j+1] = 1; /* send to proc[j] */
138         owner[i]      = j;
139         break;
140       }
141     }
142   }
143   nprocs[2*rank]=nprocs[2*rank+1]=0; /* do not receive from self! */
144   nsends        = 0;
145   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
146 
147   /* inform other processors of number of messages and max length*/
148   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
149 
150   /* allocate arrays */
151   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag1);CHKERRQ(ierr);
152   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag2);CHKERRQ(ierr);
153 
154   ierr = PetscMalloc2(nreceives*nmax,PetscInt,&rindices,nreceives,MPI_Request,&recv_waits);CHKERRQ(ierr);
155   ierr = PetscMalloc2(nsends*nmax,PetscInt,&rindices2,nsends,MPI_Request,&recv_waits2);CHKERRQ(ierr);
156 
157   ierr = PetscMalloc3(n,PetscInt,&sindices,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);CHKERRQ(ierr);
158   ierr = PetscMalloc3(n,PetscInt,&sindices2,nreceives,MPI_Request,&send_waits2,nreceives,MPI_Status,&send_status2);CHKERRQ(ierr);
159 
160   /* post 1st receives: receive others requests
161      since we don't know how long each individual message is we
162      allocate the largest needed buffer for each receive. Potentially
163      this is a lot of wasted space.
164   */
165   for (i=0,count=0; i<nreceives; i++) {
166     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr);
167   }
168 
169   /* do 1st sends:
170       1) starts[i] gives the starting index in svalues for stuff going to
171          the ith processor
172   */
173   start[0] = 0;
174   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
175   for (i=0; i<n; i++) {
176     j = owner[i];
177     if (j != rank) {
178       sindices[start[j]++]  = ia[i];
179     } else { /* compute my own map */
180       if (ia[i] >= owners[rank] && ia[i] < owners[rank+1]) {
181         ia[i] = maploc[ia[i]-owners[rank]];
182       } else {
183         ia[i] = -1;  /* ia[i] is not in the range of 0 and N-1, maps it to -1 */
184       }
185     }
186   }
187 
188   start[0] = 0;
189   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
190   for (i=0,count=0; i<size; i++) {
191     if (nprocs[2*i+1]) {
192       /* send my request to others */
193       ierr = MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag1,comm,send_waits+count);CHKERRQ(ierr);
194       /* post receive for the answer of my request */
195       ierr = MPI_Irecv(sindices2+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);CHKERRQ(ierr);
196       count++;
197     }
198   }
199   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
200 
201   /* wait on 1st sends */
202   if (nsends) {
203     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
204   }
205 
206   /* 1st recvs: other's requests */
207   for (j=0; j< nreceives; j++) {
208     ierr   = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr); /* idx: index of handle for operation that completed */
209     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
210     rbuf   = rindices+nmax*widx; /* global index */
211     source = recv_status.MPI_SOURCE;
212 
213     /* compute mapping */
214     sbuf = rbuf;
215     for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];
216 
217     /* send mapping back to the sender */
218     ierr = MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);CHKERRQ(ierr);
219   }
220 
221   /* wait on 2nd sends */
222   if (nreceives) {
223     ierr = MPI_Waitall(nreceives,send_waits2,send_status2);CHKERRQ(ierr);
224   }
225 
226   /* 2nd recvs: for the answer of my request */
227   for (j=0; j< nsends; j++) {
228     ierr   = MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);CHKERRQ(ierr);
229     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
230     source = recv_status.MPI_SOURCE;
231     /* pack output ia[] */
232     rbuf  = sindices2+start[source];
233     count = 0;
234     for (i=0; i<n; i++) {
235       if (source == owner[i]) ia[i] = rbuf[count++];
236     }
237   }
238 
239   /* free arrays */
240   ierr = PetscFree2(nprocs,start);CHKERRQ(ierr);
241   ierr = PetscFree(owner);CHKERRQ(ierr);
242   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
243   ierr = PetscFree2(rindices2,recv_waits2);CHKERRQ(ierr);
244   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
245   ierr = PetscFree3(sindices2,send_waits2,send_status2);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "AOPetscToApplication_MemoryScalable"
251 PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
252 {
253   PetscErrorCode    ierr;
254   AO_MemoryScalable *aomems  = (AO_MemoryScalable*)ao->data;
255   PetscInt          *app_loc = aomems->app_loc;
256 
257   PetscFunctionBegin;
258   ierr = AOMap_MemoryScalable_private(ao,n,ia,app_loc);CHKERRQ(ierr);
259   PetscFunctionReturn(0);
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "AOApplicationToPetsc_MemoryScalable"
264 PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
265 {
266   PetscErrorCode    ierr;
267   AO_MemoryScalable *aomems    = (AO_MemoryScalable*)ao->data;
268   PetscInt          *petsc_loc = aomems->petsc_loc;
269 
270   PetscFunctionBegin;
271   ierr = AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);CHKERRQ(ierr);
272   PetscFunctionReturn(0);
273 }
274 
275 static struct _AOOps AOOps_MemoryScalable = {
276   AOView_MemoryScalable,
277   AODestroy_MemoryScalable,
278   AOPetscToApplication_MemoryScalable,
279   AOApplicationToPetsc_MemoryScalable,
280   0,
281   0,
282   0,
283   0
284 };
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "AOCreateMemoryScalable_private"
288 PetscErrorCode  AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
289 {
290   PetscErrorCode    ierr;
291   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
292   PetscLayout       map     = aomems->map;
293   PetscInt          n_local = map->n,i,j;
294   PetscMPIInt       rank,size,tag;
295   PetscInt          *owner,*start,*nprocs,nsends,nreceives;
296   PetscInt          nmax,count,*sindices,*rindices,idx,lastidx;
297   PetscInt          *owners = aomems->map->range;
298   MPI_Request       *send_waits,*recv_waits;
299   MPI_Status        recv_status;
300   PetscMPIInt       nindices,widx;
301   PetscInt          *rbuf;
302   PetscInt          n=napp,ip,ia;
303   MPI_Status        *send_status;
304 
305   PetscFunctionBegin;
306   ierr = PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
307 
308   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
309   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
310 
311   /*  first count number of contributors (of from_array[]) to each processor */
312   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
313   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
314   ierr = PetscMalloc(n*sizeof(PetscInt),&owner);CHKERRQ(ierr);
315 
316   j       = 0;
317   lastidx = -1;
318   for (i=0; i<n; i++) {
319     /* if indices are NOT locally sorted, need to start search at the beginning */
320     if (lastidx > (idx = from_array[i])) j = 0;
321     lastidx = idx;
322     for (; j<size; j++) {
323       if (idx >= owners[j] && idx < owners[j+1]) {
324         nprocs[2*j]  += 2; /* num of indices to be sent - in pairs (ip,ia) */
325         nprocs[2*j+1] = 1; /* send to proc[j] */
326         owner[i]      = j;
327         break;
328       }
329     }
330   }
331   nprocs[2*rank]=nprocs[2*rank+1]=0; /* do not receive from self! */
332   nsends        = 0;
333   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
334 
335   /* inform other processors of number of messages and max length*/
336   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
337 
338   /* allocate arrays */
339   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag);CHKERRQ(ierr);
340   ierr = PetscMalloc2(nreceives*nmax,PetscInt,&rindices,nreceives,MPI_Request,&recv_waits);CHKERRQ(ierr);
341   ierr = PetscMalloc3(2*n,PetscInt,&sindices,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);CHKERRQ(ierr);
342   ierr = PetscMalloc(size*sizeof(PetscInt),&start);CHKERRQ(ierr);
343 
344   /* post receives: */
345   for (i=0; i<nreceives; i++) {
346     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
347   }
348 
349   /* do sends:
350       1) starts[i] gives the starting index in svalues for stuff going to
351          the ith processor
352   */
353   start[0] = 0;
354   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
355   for (i=0; i<n; i++) {
356     j = owner[i];
357     if (j != rank) {
358       ip                   = from_array[i];
359       ia                   = to_array[i];
360       sindices[start[j]++] = ip;
361       sindices[start[j]++] = ia;
362     } else { /* compute my own map */
363       ip            = from_array[i] - owners[rank];
364       ia            = to_array[i];
365       aomap_loc[ip] = ia;
366     }
367   }
368 
369   start[0] = 0;
370   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
371   for (i=0,count=0; i<size; i++) {
372     if (nprocs[2*i+1]) {
373       ierr = MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRQ(ierr);
374       count++;
375     }
376   }
377   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
378 
379   /* wait on sends */
380   if (nsends) {
381     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
382   }
383 
384   /* recvs */
385   count=0;
386   for (j= nreceives; j>0; j--) {
387     ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr);
388     ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
389     rbuf = rindices+nmax*widx; /* global index */
390 
391     /* compute local mapping */
392     for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
393       ip            = rbuf[i] - owners[rank]; /* local index */
394       ia            = rbuf[i+1];
395       aomap_loc[ip] = ia;
396     }
397     count++;
398   }
399 
400   ierr = PetscFree(start);CHKERRQ(ierr);
401   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
402   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
403   ierr = PetscFree(owner);CHKERRQ(ierr);
404   ierr = PetscFree(nprocs);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 EXTERN_C_BEGIN
409 #undef __FUNCT__
410 #define __FUNCT__ "AOCreate_MemoryScalable"
411 PetscErrorCode AOCreate_MemoryScalable(AO ao)
412 {
413   PetscErrorCode    ierr;
414   IS                isapp=ao->isapp,ispetsc=ao->ispetsc;
415   const PetscInt    *mypetsc,*myapp;
416   PetscInt          napp,n_local,N,i,start,*petsc,*lens,*disp;
417   MPI_Comm          comm;
418   AO_MemoryScalable *aomems;
419   PetscLayout       map;
420   PetscMPIInt       size,rank;
421 
422   PetscFunctionBegin;
423   /* create special struct aomems */
424   ierr     = PetscNewLog(ao, AO_MemoryScalable, &aomems);CHKERRQ(ierr);
425   ao->data = (void*) aomems;
426   ierr     = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr);
427   ierr     = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
428 
429   /* transmit all local lengths of isapp to all processors */
430   ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
431   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
432   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
433   ierr = PetscMalloc2(size,PetscInt,&lens,size,PetscInt,&disp);CHKERRQ(ierr);
434   ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr);
435   ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRQ(ierr);
436 
437   N = 0;
438   for (i = 0; i < size; i++) {
439     disp[i] = N;
440     N      += lens[i];
441   }
442 
443   /* If ispetsc is 0 then use "natural" numbering */
444   if (napp) {
445     if (!ispetsc) {
446       start = disp[rank];
447       ierr  = PetscMalloc((napp+1) * sizeof(PetscInt), &petsc);CHKERRQ(ierr);
448       for (i=0; i<napp; i++) petsc[i] = start + i;
449     } else {
450       ierr  = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
451       petsc = (PetscInt*)mypetsc;
452     }
453   }
454 
455   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
456   ierr    = PetscLayoutCreate(comm,&map);CHKERRQ(ierr);
457   map->bs = 1;
458   map->N  = N;
459   ierr    = PetscLayoutSetUp(map);CHKERRQ(ierr);
460 
461   ao->N       = N;
462   ao->n       = map->n;
463   aomems->map = map;
464 
465   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
466   n_local = map->n;
467   ierr    = PetscMalloc2(n_local,PetscInt, &aomems->app_loc,n_local,PetscInt,&aomems->petsc_loc);CHKERRQ(ierr);
468   ierr    = PetscLogObjectMemory(ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr);
469   ierr    = PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
470   ierr    = PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
471   ierr    = ISGetIndices(isapp,&myapp);CHKERRQ(ierr);
472 
473   ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr);
474   ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr);
475 
476   ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr);
477   if (napp) {
478     if (ispetsc) {
479       ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
480     } else {
481       ierr = PetscFree(petsc);CHKERRQ(ierr);
482     }
483   }
484   ierr = PetscFree2(lens,disp);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 EXTERN_C_END
488 
489 #undef __FUNCT__
490 #define __FUNCT__ "AOCreateMemoryScalable"
491 /*@C
492    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
493 
494    Collective on MPI_Comm
495 
496    Input Parameters:
497 +  comm - MPI communicator that is to share AO
498 .  napp - size of integer arrays
499 .  myapp - integer array that defines an ordering
500 -  mypetsc - integer array that defines another ordering (may be NULL to
501              indicate the natural ordering, that is 0,1,2,3,...)
502 
503    Output Parameter:
504 .  aoout - the new application ordering
505 
506    Level: beginner
507 
508     Notes: The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
509            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
510            Comparing with AOCreateBasic(), this routine trades memory with message communication.
511 
512 .keywords: AO, create
513 
514 .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
515 @*/
516 PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
517 {
518   PetscErrorCode ierr;
519   IS             isapp,ispetsc;
520   const PetscInt *app=myapp,*petsc=mypetsc;
521 
522   PetscFunctionBegin;
523   ierr = ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);CHKERRQ(ierr);
524   if (mypetsc) {
525     ierr = ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);CHKERRQ(ierr);
526   } else {
527     ispetsc = NULL;
528   }
529   ierr = AOCreateMemoryScalableIS(isapp,ispetsc,aoout);CHKERRQ(ierr);
530   ierr = ISDestroy(&isapp);CHKERRQ(ierr);
531   if (mypetsc) {
532     ierr = ISDestroy(&ispetsc);CHKERRQ(ierr);
533   }
534   PetscFunctionReturn(0);
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "AOCreateMemoryScalableIS"
539 /*@C
540    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
541 
542    Collective on IS
543 
544    Input Parameters:
545 +  isapp - index set that defines an ordering
546 -  ispetsc - index set that defines another ordering (may be NULL to use the
547              natural ordering)
548 
549    Output Parameter:
550 .  aoout - the new application ordering
551 
552    Level: beginner
553 
554     Notes: The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
555            that is there cannot be any "holes".
556            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
557 .keywords: AO, create
558 
559 .seealso: AOCreateMemoryScalable(),  AODestroy()
560 @*/
561 PetscErrorCode  AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
562 {
563   PetscErrorCode ierr;
564   MPI_Comm       comm;
565   AO             ao;
566 
567   PetscFunctionBegin;
568   ierr   = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
569   ierr   = AOCreate(comm,&ao);CHKERRQ(ierr);
570   ierr   = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr);
571   ierr   = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
572   *aoout = ao;
573   PetscFunctionReturn(0);
574 }
575