xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision 8a40300834377a32ebc3f9206ebc8531bcb9209f)
11447629fSBarry Smith /*
21447629fSBarry Smith     The memory scalable AO application ordering routines. These store the
3d38ec673SBarry Smith   orderings on each process for that process' range of values, this is more memory-efficient than `AOBASIC`
41447629fSBarry Smith */
51447629fSBarry Smith 
61447629fSBarry Smith #include <../src/vec/is/ao/aoimpl.h> /*I  "petscao.h"   I*/
71447629fSBarry Smith 
81447629fSBarry Smith typedef struct {
91447629fSBarry Smith   PetscInt   *app_loc;   /* app_loc[i] is the partner for the ith local PETSc slot */
101447629fSBarry Smith   PetscInt   *petsc_loc; /* petsc_loc[j] is the partner for the jth local app slot */
111447629fSBarry Smith   PetscLayout map;       /* determines the local sizes of ao */
121447629fSBarry Smith } AO_MemoryScalable;
131447629fSBarry Smith 
141447629fSBarry Smith /*
156bd6ae52SBarry Smith        All processors ship the data to process 0 to be printed; note that this is not scalable because
166bd6ae52SBarry Smith        process 0 allocates space for all the orderings entry across all the processes
171447629fSBarry Smith */
18da8c939bSJacob Faibussowitsch static PetscErrorCode AOView_MemoryScalable(AO ao, PetscViewer viewer)
19d71ae5a4SJacob Faibussowitsch {
201447629fSBarry Smith   PetscMPIInt        rank, size;
211447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
221447629fSBarry Smith   PetscBool          iascii;
231447629fSBarry Smith   PetscMPIInt        tag_app, tag_petsc;
241447629fSBarry Smith   PetscLayout        map = aomems->map;
256497c311SBarry Smith   PetscInt          *app, *app_loc, *petsc, *petsc_loc, len, j;
261447629fSBarry Smith   MPI_Status         status;
271447629fSBarry Smith 
281447629fSBarry Smith   PetscFunctionBegin;
299566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
3028b400f6SJacob Faibussowitsch   PetscCheck(iascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not supported for AO MemoryScalable", ((PetscObject)viewer)->type_name);
311447629fSBarry Smith 
329566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ao), &rank));
339566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ao), &size));
341447629fSBarry Smith 
359566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag_app));
369566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag_petsc));
371447629fSBarry Smith 
38dd400576SPatrick Sanan   if (rank == 0) {
399566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "Number of elements in ordering %" PetscInt_FMT "\n", ao->N));
409566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "PETSc->App  App->PETSc\n"));
411447629fSBarry Smith 
429566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(map->N, &app, map->N, &petsc));
431447629fSBarry Smith     len = map->n;
441447629fSBarry Smith     /* print local AO */
459566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", rank));
466497c311SBarry Smith     for (PetscInt i = 0; i < len; i++) PetscCall(PetscViewerASCIIPrintf(viewer, "%3" PetscInt_FMT "  %3" PetscInt_FMT "    %3" PetscInt_FMT "  %3" PetscInt_FMT "\n", i, aomems->app_loc[i], i, aomems->petsc_loc[i]));
471447629fSBarry Smith 
481447629fSBarry Smith     /* recv and print off-processor's AO */
496497c311SBarry Smith     for (PetscMPIInt i = 1; i < size; i++) {
501447629fSBarry Smith       len       = map->range[i + 1] - map->range[i];
511447629fSBarry Smith       app_loc   = app + map->range[i];
521447629fSBarry Smith       petsc_loc = petsc + map->range[i];
536497c311SBarry Smith       PetscCallMPI(MPIU_Recv(app_loc, len, MPIU_INT, i, tag_app, PetscObjectComm((PetscObject)ao), &status));
546497c311SBarry Smith       PetscCallMPI(MPIU_Recv(petsc_loc, len, MPIU_INT, i, tag_petsc, PetscObjectComm((PetscObject)ao), &status));
556497c311SBarry Smith       PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", i));
5648a46eb9SPierre Jolivet       for (j = 0; j < len; j++) PetscCall(PetscViewerASCIIPrintf(viewer, "%3" PetscInt_FMT "  %3" PetscInt_FMT "    %3" PetscInt_FMT "  %3" PetscInt_FMT "\n", map->range[i] + j, app_loc[j], map->range[i] + j, petsc_loc[j]));
571447629fSBarry Smith     }
589566063dSJacob Faibussowitsch     PetscCall(PetscFree2(app, petsc));
591447629fSBarry Smith 
601447629fSBarry Smith   } else {
611447629fSBarry Smith     /* send values */
626497c311SBarry Smith     PetscCallMPI(MPIU_Send((void *)aomems->app_loc, map->n, MPIU_INT, 0, tag_app, PetscObjectComm((PetscObject)ao)));
636497c311SBarry Smith     PetscCallMPI(MPIU_Send((void *)aomems->petsc_loc, map->n, MPIU_INT, 0, tag_petsc, PetscObjectComm((PetscObject)ao)));
641447629fSBarry Smith   }
659566063dSJacob Faibussowitsch   PetscCall(PetscViewerFlush(viewer));
663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
671447629fSBarry Smith }
681447629fSBarry Smith 
69da8c939bSJacob Faibussowitsch static PetscErrorCode AODestroy_MemoryScalable(AO ao)
70d71ae5a4SJacob Faibussowitsch {
711447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
721447629fSBarry Smith 
731447629fSBarry Smith   PetscFunctionBegin;
749566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aomems->app_loc, aomems->petsc_loc));
759566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&aomems->map));
769566063dSJacob Faibussowitsch   PetscCall(PetscFree(aomems));
773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
781447629fSBarry Smith }
791447629fSBarry Smith 
801447629fSBarry Smith /*
811447629fSBarry Smith    Input Parameters:
821447629fSBarry Smith +   ao - the application ordering context
831447629fSBarry Smith .   n  - the number of integers in ia[]
841447629fSBarry Smith .   ia - the integers; these are replaced with their mapped value
851447629fSBarry Smith -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
861447629fSBarry Smith 
871447629fSBarry Smith    Output Parameter:
881447629fSBarry Smith .   ia - the mapped interges
891447629fSBarry Smith  */
90da8c939bSJacob Faibussowitsch static PetscErrorCode AOMap_MemoryScalable_private(AO ao, PetscInt n, PetscInt *ia, const PetscInt *maploc)
91d71ae5a4SJacob Faibussowitsch {
921447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
931447629fSBarry Smith   MPI_Comm           comm;
946497c311SBarry Smith   PetscMPIInt        rank, size, tag1, tag2, count;
956497c311SBarry Smith   PetscMPIInt       *owner, j;
966497c311SBarry Smith   PetscInt           nmax, *sindices, *rindices, idx, lastidx, *sindices2, *rindices2, *sizes, *start;
976497c311SBarry Smith   PetscMPIInt        nreceives, nsends;
986bd6ae52SBarry Smith   const PetscInt    *owners = aomems->map->range;
991447629fSBarry Smith   MPI_Request       *send_waits, *recv_waits, *send_waits2, *recv_waits2;
1001447629fSBarry Smith   MPI_Status         recv_status;
1016497c311SBarry Smith   PetscMPIInt        source, widx;
1026497c311SBarry Smith   PetscCount         nindices;
1036497c311SBarry Smith   PetscInt          *rbuf, *sbuf, inreceives;
1041447629fSBarry Smith   MPI_Status        *send_status, *send_status2;
1051447629fSBarry Smith 
1061447629fSBarry Smith   PetscFunctionBegin;
1079566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)ao, &comm));
1089566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1099566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
1101447629fSBarry Smith 
1111447629fSBarry Smith   /*  first count number of contributors to each processor */
1129566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &start));
1139566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(2 * size, &sizes, n, &owner));
1141447629fSBarry Smith 
1151447629fSBarry Smith   j       = 0;
1161447629fSBarry Smith   lastidx = -1;
1176497c311SBarry Smith   for (PetscInt i = 0; i < n; i++) {
1186bd6ae52SBarry Smith     if (ia[i] < 0) owner[i] = -1;      /* mark negative entries (which are not to be mapped) with a special negative value */
1196bd6ae52SBarry Smith     if (ia[i] >= ao->N) owner[i] = -2; /* mark out of range entries with special negative value */
1206bd6ae52SBarry Smith     else {
1211447629fSBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
1221447629fSBarry Smith       if (lastidx > (idx = ia[i])) j = 0;
1231447629fSBarry Smith       lastidx = idx;
1241447629fSBarry Smith       for (; j < size; j++) {
1251447629fSBarry Smith         if (idx >= owners[j] && idx < owners[j + 1]) {
12676ec1555SBarry Smith           sizes[2 * j]++;       /* num of indices to be sent */
12776ec1555SBarry Smith           sizes[2 * j + 1] = 1; /* send to proc[j] */
1281447629fSBarry Smith           owner[i]         = j;
1291447629fSBarry Smith           break;
1301447629fSBarry Smith         }
1311447629fSBarry Smith       }
1321447629fSBarry Smith     }
1336bd6ae52SBarry Smith   }
13476ec1555SBarry Smith   sizes[2 * rank] = sizes[2 * rank + 1] = 0; /* do not receive from self! */
1351447629fSBarry Smith   nsends                                = 0;
1366497c311SBarry Smith   for (PetscMPIInt i = 0; i < size; i++) nsends += sizes[2 * i + 1];
1371447629fSBarry Smith 
1381447629fSBarry Smith   /* inform other processors of number of messages and max length*/
1396497c311SBarry Smith   PetscCall(PetscMaxSum(comm, sizes, &nmax, &inreceives));
1406497c311SBarry Smith   PetscCall(PetscMPIIntCast(inreceives, &nreceives));
1411447629fSBarry Smith 
1421447629fSBarry Smith   /* allocate arrays */
1439566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag1));
1449566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag2));
1451447629fSBarry Smith 
1469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nreceives * nmax, &rindices, nreceives, &recv_waits));
1479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nsends * nmax, &rindices2, nsends, &recv_waits2));
1481447629fSBarry Smith 
1499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(n, &sindices, nsends, &send_waits, nsends, &send_status));
1509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(n, &sindices2, nreceives, &send_waits2, nreceives, &send_status2));
1511447629fSBarry Smith 
1521447629fSBarry Smith   /* post 1st receives: receive others requests
1531447629fSBarry Smith      since we don't know how long each individual message is we
1541447629fSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1551447629fSBarry Smith      this is a lot of wasted space.
1561447629fSBarry Smith   */
1576497c311SBarry Smith   for (PetscMPIInt i = 0; i < nreceives; i++) PetscCallMPI(MPIU_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag1, comm, recv_waits + i));
1581447629fSBarry Smith 
1591447629fSBarry Smith   /* do 1st sends:
1601447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1611447629fSBarry Smith          the ith processor
1621447629fSBarry Smith   */
1631447629fSBarry Smith   start[0] = 0;
1646497c311SBarry Smith   for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
1656497c311SBarry Smith   for (PetscInt i = 0; i < n; i++) {
1661447629fSBarry Smith     j = owner[i];
1676bd6ae52SBarry Smith     if (j == -1) continue; /* do not remap negative entries in ia[] */
1689371c9d4SSatish Balay     else if (j == -2) { /* out of range entries get mapped to -1 */ ia[i] = -1;
1696bd6ae52SBarry Smith       continue;
1706bd6ae52SBarry Smith     } else if (j != rank) {
1711447629fSBarry Smith       sindices[start[j]++] = ia[i];
1721447629fSBarry Smith     } else { /* compute my own map */
1731447629fSBarry Smith       ia[i] = maploc[ia[i] - owners[rank]];
1741447629fSBarry Smith     }
1751447629fSBarry Smith   }
1761447629fSBarry Smith 
1771447629fSBarry Smith   start[0] = 0;
1786497c311SBarry Smith   for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
1796497c311SBarry Smith   count = 0;
1806497c311SBarry Smith   for (PetscMPIInt i = 0; i < size; i++) {
18176ec1555SBarry Smith     if (sizes[2 * i + 1]) {
1821447629fSBarry Smith       /* send my request to others */
1836497c311SBarry Smith       PetscCallMPI(MPIU_Isend(sindices + start[i], sizes[2 * i], MPIU_INT, i, tag1, comm, send_waits + count));
1841447629fSBarry Smith       /* post receive for the answer of my request */
1856497c311SBarry Smith       PetscCallMPI(MPIU_Irecv(sindices2 + start[i], sizes[2 * i], MPIU_INT, i, tag2, comm, recv_waits2 + count));
1861447629fSBarry Smith       count++;
1871447629fSBarry Smith     }
1881447629fSBarry Smith   }
1896497c311SBarry Smith   PetscCheck(nsends == count, comm, PETSC_ERR_SUP, "nsends %d != count %d", nsends, count);
1901447629fSBarry Smith 
1911447629fSBarry Smith   /* wait on 1st sends */
1926497c311SBarry Smith   if (nsends) PetscCallMPI(MPI_Waitall((PetscMPIInt)nsends, send_waits, send_status));
1931447629fSBarry Smith 
1941447629fSBarry Smith   /* 1st recvs: other's requests */
1951447629fSBarry Smith   for (j = 0; j < nreceives; j++) {
1966497c311SBarry Smith     PetscCallMPI(MPI_Waitany((PetscMPIInt)nreceives, recv_waits, &widx, &recv_status)); /* idx: index of handle for operation that completed */
1976497c311SBarry Smith     PetscCallMPI(MPIU_Get_count(&recv_status, MPIU_INT, &nindices));
1981447629fSBarry Smith     rbuf   = rindices + nmax * widx; /* global index */
1991447629fSBarry Smith     source = recv_status.MPI_SOURCE;
2001447629fSBarry Smith 
2011447629fSBarry Smith     /* compute mapping */
2021447629fSBarry Smith     sbuf = rbuf;
2036497c311SBarry Smith     for (PetscCount i = 0; i < nindices; i++) sbuf[i] = maploc[rbuf[i] - owners[rank]];
2041447629fSBarry Smith 
2051447629fSBarry Smith     /* send mapping back to the sender */
2066497c311SBarry Smith     PetscCallMPI(MPIU_Isend(sbuf, nindices, MPIU_INT, source, tag2, comm, send_waits2 + widx));
2071447629fSBarry Smith   }
2081447629fSBarry Smith 
2091447629fSBarry Smith   /* wait on 2nd sends */
2106497c311SBarry Smith   if (nreceives) PetscCallMPI(MPI_Waitall((PetscMPIInt)nreceives, send_waits2, send_status2));
2111447629fSBarry Smith 
2121447629fSBarry Smith   /* 2nd recvs: for the answer of my request */
2131447629fSBarry Smith   for (j = 0; j < nsends; j++) {
2146497c311SBarry Smith     PetscCallMPI(MPI_Waitany((PetscMPIInt)nsends, recv_waits2, &widx, &recv_status));
2156497c311SBarry Smith     PetscCallMPI(MPIU_Get_count(&recv_status, MPIU_INT, &nindices));
2161447629fSBarry Smith     source = recv_status.MPI_SOURCE;
2171447629fSBarry Smith     /* pack output ia[] */
2181447629fSBarry Smith     rbuf  = sindices2 + start[source];
2191447629fSBarry Smith     count = 0;
2206497c311SBarry Smith     for (PetscCount i = 0; i < n; i++) {
2211447629fSBarry Smith       if (source == owner[i]) ia[i] = rbuf[count++];
2221447629fSBarry Smith     }
2231447629fSBarry Smith   }
2241447629fSBarry Smith 
2251447629fSBarry Smith   /* free arrays */
2269566063dSJacob Faibussowitsch   PetscCall(PetscFree(start));
2279566063dSJacob Faibussowitsch   PetscCall(PetscFree2(sizes, owner));
2289566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rindices, recv_waits));
2299566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rindices2, recv_waits2));
2309566063dSJacob Faibussowitsch   PetscCall(PetscFree3(sindices, send_waits, send_status));
2319566063dSJacob Faibussowitsch   PetscCall(PetscFree3(sindices2, send_waits2, send_status2));
2323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2331447629fSBarry Smith }
2341447629fSBarry Smith 
235da8c939bSJacob Faibussowitsch static PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao, PetscInt n, PetscInt *ia)
236d71ae5a4SJacob Faibussowitsch {
2371447629fSBarry Smith   AO_MemoryScalable *aomems  = (AO_MemoryScalable *)ao->data;
2381447629fSBarry Smith   PetscInt          *app_loc = aomems->app_loc;
2391447629fSBarry Smith 
2401447629fSBarry Smith   PetscFunctionBegin;
2419566063dSJacob Faibussowitsch   PetscCall(AOMap_MemoryScalable_private(ao, n, ia, app_loc));
2423ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2431447629fSBarry Smith }
2441447629fSBarry Smith 
245da8c939bSJacob Faibussowitsch static PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao, PetscInt n, PetscInt *ia)
246d71ae5a4SJacob Faibussowitsch {
2471447629fSBarry Smith   AO_MemoryScalable *aomems    = (AO_MemoryScalable *)ao->data;
2481447629fSBarry Smith   PetscInt          *petsc_loc = aomems->petsc_loc;
2491447629fSBarry Smith 
2501447629fSBarry Smith   PetscFunctionBegin;
2519566063dSJacob Faibussowitsch   PetscCall(AOMap_MemoryScalable_private(ao, n, ia, petsc_loc));
2523ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2531447629fSBarry Smith }
2541447629fSBarry Smith 
255da8c939bSJacob Faibussowitsch static const struct _AOOps AOOps_MemoryScalable = {
256267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(view, AOView_MemoryScalable),
257267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(destroy, AODestroy_MemoryScalable),
258267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(petsctoapplication, AOPetscToApplication_MemoryScalable),
259267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(applicationtopetsc, AOApplicationToPetsc_MemoryScalable),
260*8a403008SStefano Zampini   PetscDesignatedInitializer(petsctoapplicationpermuteint, NULL),
261*8a403008SStefano Zampini   PetscDesignatedInitializer(applicationtopetscpermuteint, NULL),
262*8a403008SStefano Zampini   PetscDesignatedInitializer(petsctoapplicationpermutereal, NULL),
263*8a403008SStefano Zampini   PetscDesignatedInitializer(applicationtopetscpermutereal, NULL),
2641447629fSBarry Smith };
2651447629fSBarry Smith 
266da8c939bSJacob Faibussowitsch static PetscErrorCode AOCreateMemoryScalable_private(MPI_Comm comm, PetscInt napp, const PetscInt from_array[], const PetscInt to_array[], AO ao, PetscInt *aomap_loc)
267d71ae5a4SJacob Faibussowitsch {
2681447629fSBarry Smith   AO_MemoryScalable *aomems  = (AO_MemoryScalable *)ao->data;
2691447629fSBarry Smith   PetscLayout        map     = aomems->map;
2701447629fSBarry Smith   PetscInt           n_local = map->n, i, j;
2711447629fSBarry Smith   PetscMPIInt        rank, size, tag;
27276ec1555SBarry Smith   PetscInt          *owner, *start, *sizes, nsends, nreceives;
2731447629fSBarry Smith   PetscInt           nmax, count, *sindices, *rindices, idx, lastidx;
2741447629fSBarry Smith   PetscInt          *owners = aomems->map->range;
2751447629fSBarry Smith   MPI_Request       *send_waits, *recv_waits;
2761447629fSBarry Smith   MPI_Status         recv_status;
2776497c311SBarry Smith   PetscMPIInt        widx;
2781447629fSBarry Smith   PetscInt          *rbuf;
2791447629fSBarry Smith   PetscInt           n = napp, ip, ia;
2801447629fSBarry Smith   MPI_Status        *send_status;
2816497c311SBarry Smith   PetscCount         nindices;
2821447629fSBarry Smith 
2831447629fSBarry Smith   PetscFunctionBegin;
2849566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(aomap_loc, n_local));
2851447629fSBarry Smith 
2869566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2879566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2881447629fSBarry Smith 
2891447629fSBarry Smith   /*  first count number of contributors (of from_array[]) to each processor */
2909566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2 * size, &sizes));
2919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &owner));
2921447629fSBarry Smith 
2931447629fSBarry Smith   j       = 0;
2941447629fSBarry Smith   lastidx = -1;
2951447629fSBarry Smith   for (i = 0; i < n; i++) {
2961447629fSBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
2971447629fSBarry Smith     if (lastidx > (idx = from_array[i])) j = 0;
2981447629fSBarry Smith     lastidx = idx;
2991447629fSBarry Smith     for (; j < size; j++) {
3001447629fSBarry Smith       if (idx >= owners[j] && idx < owners[j + 1]) {
30176ec1555SBarry Smith         sizes[2 * j] += 2;    /* num of indices to be sent - in pairs (ip,ia) */
30276ec1555SBarry Smith         sizes[2 * j + 1] = 1; /* send to proc[j] */
3031447629fSBarry Smith         owner[i]         = j;
3041447629fSBarry Smith         break;
3051447629fSBarry Smith       }
3061447629fSBarry Smith     }
3071447629fSBarry Smith   }
30876ec1555SBarry Smith   sizes[2 * rank] = sizes[2 * rank + 1] = 0; /* do not receive from self! */
3091447629fSBarry Smith   nsends                                = 0;
31076ec1555SBarry Smith   for (i = 0; i < size; i++) nsends += sizes[2 * i + 1];
3111447629fSBarry Smith 
3121447629fSBarry Smith   /* inform other processors of number of messages and max length*/
3139566063dSJacob Faibussowitsch   PetscCall(PetscMaxSum(comm, sizes, &nmax, &nreceives));
3141447629fSBarry Smith 
3151447629fSBarry Smith   /* allocate arrays */
3169566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag));
3179566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nreceives * nmax, &rindices, nreceives, &recv_waits));
3189566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(2 * n, &sindices, nsends, &send_waits, nsends, &send_status));
3199566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &start));
3201447629fSBarry Smith 
3211447629fSBarry Smith   /* post receives: */
3226497c311SBarry Smith   for (i = 0; i < nreceives; i++) PetscCallMPI(MPIU_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag, comm, recv_waits + i));
3231447629fSBarry Smith 
3241447629fSBarry Smith   /* do sends:
3251447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3261447629fSBarry Smith          the ith processor
3271447629fSBarry Smith   */
3281447629fSBarry Smith   start[0] = 0;
32976ec1555SBarry Smith   for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
3301447629fSBarry Smith   for (i = 0; i < n; i++) {
3311447629fSBarry Smith     j = owner[i];
3321447629fSBarry Smith     if (j != rank) {
3331447629fSBarry Smith       ip                   = from_array[i];
3341447629fSBarry Smith       ia                   = to_array[i];
3351447629fSBarry Smith       sindices[start[j]++] = ip;
3361447629fSBarry Smith       sindices[start[j]++] = ia;
3371447629fSBarry Smith     } else { /* compute my own map */
3381447629fSBarry Smith       ip            = from_array[i] - owners[rank];
3391447629fSBarry Smith       ia            = to_array[i];
3401447629fSBarry Smith       aomap_loc[ip] = ia;
3411447629fSBarry Smith     }
3421447629fSBarry Smith   }
3431447629fSBarry Smith 
3441447629fSBarry Smith   start[0] = 0;
3456497c311SBarry Smith   for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
3466497c311SBarry Smith   count = 0;
3476497c311SBarry Smith   for (PetscMPIInt i = 0; i < size; i++) {
34876ec1555SBarry Smith     if (sizes[2 * i + 1]) {
3496497c311SBarry Smith       PetscCallMPI(MPIU_Isend(sindices + start[i], sizes[2 * i], MPIU_INT, i, tag, comm, send_waits + count));
3501447629fSBarry Smith       count++;
3511447629fSBarry Smith     }
3521447629fSBarry Smith   }
35308401ef6SPierre Jolivet   PetscCheck(nsends == count, comm, PETSC_ERR_SUP, "nsends %" PetscInt_FMT " != count %" PetscInt_FMT, nsends, count);
3541447629fSBarry Smith 
3551447629fSBarry Smith   /* wait on sends */
3566497c311SBarry Smith   if (nsends) PetscCallMPI(MPI_Waitall((PetscMPIInt)nsends, send_waits, send_status));
3571447629fSBarry Smith 
3581447629fSBarry Smith   /* recvs */
3591447629fSBarry Smith   count = 0;
3601447629fSBarry Smith   for (j = nreceives; j > 0; j--) {
3616497c311SBarry Smith     PetscCallMPI(MPI_Waitany((PetscMPIInt)nreceives, recv_waits, &widx, &recv_status));
3626497c311SBarry Smith     PetscCallMPI(MPIU_Get_count(&recv_status, MPIU_INT, &nindices));
3631447629fSBarry Smith     rbuf = rindices + nmax * widx; /* global index */
3641447629fSBarry Smith 
3651447629fSBarry Smith     /* compute local mapping */
3661447629fSBarry Smith     for (i = 0; i < nindices; i += 2) {       /* pack aomap_loc */
3671447629fSBarry Smith       ip            = rbuf[i] - owners[rank]; /* local index */
3681447629fSBarry Smith       ia            = rbuf[i + 1];
3691447629fSBarry Smith       aomap_loc[ip] = ia;
3701447629fSBarry Smith     }
3711447629fSBarry Smith     count++;
3721447629fSBarry Smith   }
3731447629fSBarry Smith 
3749566063dSJacob Faibussowitsch   PetscCall(PetscFree(start));
3759566063dSJacob Faibussowitsch   PetscCall(PetscFree3(sindices, send_waits, send_status));
3769566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rindices, recv_waits));
3779566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
3789566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
3793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3801447629fSBarry Smith }
3811447629fSBarry Smith 
382da8c939bSJacob Faibussowitsch PETSC_INTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
383d71ae5a4SJacob Faibussowitsch {
3841447629fSBarry Smith   IS                 isapp = ao->isapp, ispetsc = ao->ispetsc;
3851447629fSBarry Smith   const PetscInt    *mypetsc, *myapp;
3861447629fSBarry Smith   PetscInt           napp, n_local, N, i, start, *petsc, *lens, *disp;
3871447629fSBarry Smith   MPI_Comm           comm;
3881447629fSBarry Smith   AO_MemoryScalable *aomems;
3891447629fSBarry Smith   PetscLayout        map;
3901447629fSBarry Smith   PetscMPIInt        size, rank;
3911447629fSBarry Smith 
3921447629fSBarry Smith   PetscFunctionBegin;
39328b400f6SJacob Faibussowitsch   PetscCheck(isapp, PetscObjectComm((PetscObject)ao), PETSC_ERR_ARG_WRONGSTATE, "AOSetIS() must be called before AOSetType()");
3941447629fSBarry Smith   /* create special struct aomems */
3954dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&aomems));
3961447629fSBarry Smith   ao->data   = (void *)aomems;
397aea10558SJacob Faibussowitsch   ao->ops[0] = AOOps_MemoryScalable;
3989566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)ao, AOMEMORYSCALABLE));
3991447629fSBarry Smith 
4001447629fSBarry Smith   /* transmit all local lengths of isapp to all processors */
4019566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)isapp, &comm));
4029566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
4039566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4049566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size, &lens, size, &disp));
4059566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isapp, &napp));
4069566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm));
4071447629fSBarry Smith 
4081447629fSBarry Smith   N = 0;
4091447629fSBarry Smith   for (i = 0; i < size; i++) {
4101447629fSBarry Smith     disp[i] = N;
4111447629fSBarry Smith     N += lens[i];
4121447629fSBarry Smith   }
4131447629fSBarry Smith 
4141447629fSBarry Smith   /* If ispetsc is 0 then use "natural" numbering */
4151447629fSBarry Smith   if (napp) {
4161447629fSBarry Smith     if (!ispetsc) {
4171447629fSBarry Smith       start = disp[rank];
4189566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(napp + 1, &petsc));
4191447629fSBarry Smith       for (i = 0; i < napp; i++) petsc[i] = start + i;
4201447629fSBarry Smith     } else {
4219566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(ispetsc, &mypetsc));
4221447629fSBarry Smith       petsc = (PetscInt *)mypetsc;
4231447629fSBarry Smith     }
4244a2f8832SBarry Smith   } else {
4254a2f8832SBarry Smith     petsc = NULL;
4261447629fSBarry Smith   }
4271447629fSBarry Smith 
4281447629fSBarry Smith   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
4299566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreate(comm, &map));
4301447629fSBarry Smith   map->bs = 1;
4311447629fSBarry Smith   map->N  = N;
4329566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(map));
4331447629fSBarry Smith 
4341447629fSBarry Smith   ao->N       = N;
4351447629fSBarry Smith   ao->n       = map->n;
4361447629fSBarry Smith   aomems->map = map;
4371447629fSBarry Smith 
4381447629fSBarry Smith   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
4391447629fSBarry Smith   n_local = map->n;
4409566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(n_local, &aomems->app_loc, n_local, &aomems->petsc_loc));
4419566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isapp, &myapp));
4421447629fSBarry Smith 
4439566063dSJacob Faibussowitsch   PetscCall(AOCreateMemoryScalable_private(comm, napp, petsc, myapp, ao, aomems->app_loc));
4449566063dSJacob Faibussowitsch   PetscCall(AOCreateMemoryScalable_private(comm, napp, myapp, petsc, ao, aomems->petsc_loc));
4451447629fSBarry Smith 
4469566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isapp, &myapp));
4471447629fSBarry Smith   if (napp) {
4481447629fSBarry Smith     if (ispetsc) {
4499566063dSJacob Faibussowitsch       PetscCall(ISRestoreIndices(ispetsc, &mypetsc));
4501447629fSBarry Smith     } else {
4519566063dSJacob Faibussowitsch       PetscCall(PetscFree(petsc));
4521447629fSBarry Smith     }
4531447629fSBarry Smith   }
4549566063dSJacob Faibussowitsch   PetscCall(PetscFree2(lens, disp));
4553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4561447629fSBarry Smith }
4571447629fSBarry Smith 
458cc4c1da9SBarry Smith /*@
4591447629fSBarry Smith   AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
4601447629fSBarry Smith 
461d083f849SBarry Smith   Collective
4621447629fSBarry Smith 
4631447629fSBarry Smith   Input Parameters:
464cab54364SBarry Smith + comm    - MPI communicator that is to share the `AO`
465d38ec673SBarry Smith . napp    - size of `myapp` and `mypetsc`
4661447629fSBarry Smith . myapp   - integer array that defines an ordering
467b6971eaeSBarry Smith - mypetsc - integer array that defines another ordering (may be `NULL` to indicate the natural ordering, that is 0,1,2,3,...)
4681447629fSBarry Smith 
4691447629fSBarry Smith   Output Parameter:
4701447629fSBarry Smith . aoout - the new application ordering
4711447629fSBarry Smith 
4721447629fSBarry Smith   Level: beginner
4731447629fSBarry Smith 
474cab54364SBarry Smith   Note:
475b6971eaeSBarry Smith   The arrays `myapp` and `mypetsc` must contain the all the integers 0 to `napp`-1 with no duplicates; that is there cannot be any "holes"
476cab54364SBarry Smith   in the indices. Use `AOCreateMapping()` or `AOCreateMappingIS()` if you wish to have "holes" in the indices.
477cab54364SBarry Smith   Comparing with `AOCreateBasic()`, this routine trades memory with message communication.
4781447629fSBarry Smith 
479cab54364SBarry Smith .seealso: [](sec_ao), [](sec_scatter), `AO`, `AOCreateMemoryScalableIS()`, `AODestroy()`, `AOPetscToApplication()`, `AOApplicationToPetsc()`
4801447629fSBarry Smith @*/
481d71ae5a4SJacob Faibussowitsch PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm, PetscInt napp, const PetscInt myapp[], const PetscInt mypetsc[], AO *aoout)
482d71ae5a4SJacob Faibussowitsch {
4831447629fSBarry Smith   IS              isapp, ispetsc;
4841447629fSBarry Smith   const PetscInt *app = myapp, *petsc = mypetsc;
4851447629fSBarry Smith 
4861447629fSBarry Smith   PetscFunctionBegin;
4879566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(comm, napp, app, PETSC_USE_POINTER, &isapp));
4881447629fSBarry Smith   if (mypetsc) {
4899566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(comm, napp, petsc, PETSC_USE_POINTER, &ispetsc));
4901447629fSBarry Smith   } else {
4911447629fSBarry Smith     ispetsc = NULL;
4921447629fSBarry Smith   }
4939566063dSJacob Faibussowitsch   PetscCall(AOCreateMemoryScalableIS(isapp, ispetsc, aoout));
4949566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&isapp));
49548a46eb9SPierre Jolivet   if (mypetsc) PetscCall(ISDestroy(&ispetsc));
4963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4971447629fSBarry Smith }
4981447629fSBarry Smith 
4995d83a8b1SBarry Smith /*@
5001447629fSBarry Smith   AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
5011447629fSBarry Smith 
502c3339decSBarry Smith   Collective
5031447629fSBarry Smith 
5041447629fSBarry Smith   Input Parameters:
5051447629fSBarry Smith + isapp   - index set that defines an ordering
506b6971eaeSBarry Smith - ispetsc - index set that defines another ordering (may be `NULL` to use the natural ordering)
5071447629fSBarry Smith 
5081447629fSBarry Smith   Output Parameter:
5091447629fSBarry Smith . aoout - the new application ordering
5101447629fSBarry Smith 
5111447629fSBarry Smith   Level: beginner
5121447629fSBarry Smith 
51395452b02SPatrick Sanan   Notes:
514b6971eaeSBarry Smith   The index sets `isapp` and `ispetsc` must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
5151447629fSBarry Smith   that is there cannot be any "holes".
516cab54364SBarry Smith 
517cab54364SBarry Smith   Comparing with `AOCreateBasicIS()`, this routine trades memory with message communication.
518cab54364SBarry Smith 
519cab54364SBarry Smith .seealso: [](sec_ao), [](sec_scatter), `AO`, `AOCreateBasicIS()`, `AOCreateMemoryScalable()`, `AODestroy()`
5201447629fSBarry Smith @*/
521d71ae5a4SJacob Faibussowitsch PetscErrorCode AOCreateMemoryScalableIS(IS isapp, IS ispetsc, AO *aoout)
522d71ae5a4SJacob Faibussowitsch {
5231447629fSBarry Smith   MPI_Comm comm;
5241447629fSBarry Smith   AO       ao;
5251447629fSBarry Smith 
5261447629fSBarry Smith   PetscFunctionBegin;
5279566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)isapp, &comm));
5289566063dSJacob Faibussowitsch   PetscCall(AOCreate(comm, &ao));
5299566063dSJacob Faibussowitsch   PetscCall(AOSetIS(ao, isapp, ispetsc));
5309566063dSJacob Faibussowitsch   PetscCall(AOSetType(ao, AOMEMORYSCALABLE));
5319566063dSJacob Faibussowitsch   PetscCall(AOViewFromOptions(ao, NULL, "-ao_view"));
5321447629fSBarry Smith   *aoout = ao;
5333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5341447629fSBarry Smith }
535