xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision c3339decea92175325d9368fa13196bcd0e0e58b)
11447629fSBarry Smith 
21447629fSBarry Smith /*
31447629fSBarry Smith     The memory scalable AO application ordering routines. These store the
46bd6ae52SBarry Smith   orderings on each processor for that processor's range of values
51447629fSBarry Smith */
61447629fSBarry Smith 
71447629fSBarry Smith #include <../src/vec/is/ao/aoimpl.h> /*I  "petscao.h"   I*/
81447629fSBarry Smith 
91447629fSBarry Smith typedef struct {
101447629fSBarry Smith   PetscInt   *app_loc;   /* app_loc[i] is the partner for the ith local PETSc slot */
111447629fSBarry Smith   PetscInt   *petsc_loc; /* petsc_loc[j] is the partner for the jth local app slot */
121447629fSBarry Smith   PetscLayout map;       /* determines the local sizes of ao */
131447629fSBarry Smith } AO_MemoryScalable;
141447629fSBarry Smith 
151447629fSBarry Smith /*
166bd6ae52SBarry Smith        All processors ship the data to process 0 to be printed; note that this is not scalable because
176bd6ae52SBarry Smith        process 0 allocates space for all the orderings entry across all the processes
181447629fSBarry Smith */
19d71ae5a4SJacob Faibussowitsch PetscErrorCode AOView_MemoryScalable(AO ao, PetscViewer viewer)
20d71ae5a4SJacob Faibussowitsch {
211447629fSBarry Smith   PetscMPIInt        rank, size;
221447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
231447629fSBarry Smith   PetscBool          iascii;
241447629fSBarry Smith   PetscMPIInt        tag_app, tag_petsc;
251447629fSBarry Smith   PetscLayout        map = aomems->map;
261447629fSBarry Smith   PetscInt          *app, *app_loc, *petsc, *petsc_loc, len, i, j;
271447629fSBarry Smith   MPI_Status         status;
281447629fSBarry Smith 
291447629fSBarry Smith   PetscFunctionBegin;
309566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
3128b400f6SJacob Faibussowitsch   PetscCheck(iascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not supported for AO MemoryScalable", ((PetscObject)viewer)->type_name);
321447629fSBarry Smith 
339566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ao), &rank));
349566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ao), &size));
351447629fSBarry Smith 
369566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag_app));
379566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag_petsc));
381447629fSBarry Smith 
39dd400576SPatrick Sanan   if (rank == 0) {
409566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "Number of elements in ordering %" PetscInt_FMT "\n", ao->N));
419566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "PETSc->App  App->PETSc\n"));
421447629fSBarry Smith 
439566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(map->N, &app, map->N, &petsc));
441447629fSBarry Smith     len = map->n;
451447629fSBarry Smith     /* print local AO */
469566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", rank));
4748a46eb9SPierre Jolivet     for (i = 0; i < len; i++) PetscCall(PetscViewerASCIIPrintf(viewer, "%3" PetscInt_FMT "  %3" PetscInt_FMT "    %3" PetscInt_FMT "  %3" PetscInt_FMT "\n", i, aomems->app_loc[i], i, aomems->petsc_loc[i]));
481447629fSBarry Smith 
491447629fSBarry Smith     /* recv and print off-processor's AO */
501447629fSBarry Smith     for (i = 1; i < size; i++) {
511447629fSBarry Smith       len       = map->range[i + 1] - map->range[i];
521447629fSBarry Smith       app_loc   = app + map->range[i];
531447629fSBarry Smith       petsc_loc = petsc + map->range[i];
549566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(app_loc, (PetscMPIInt)len, MPIU_INT, i, tag_app, PetscObjectComm((PetscObject)ao), &status));
559566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(petsc_loc, (PetscMPIInt)len, MPIU_INT, i, tag_petsc, PetscObjectComm((PetscObject)ao), &status));
569566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%" PetscInt_FMT "]\n", i));
5748a46eb9SPierre Jolivet       for (j = 0; j < len; j++) PetscCall(PetscViewerASCIIPrintf(viewer, "%3" PetscInt_FMT "  %3" PetscInt_FMT "    %3" PetscInt_FMT "  %3" PetscInt_FMT "\n", map->range[i] + j, app_loc[j], map->range[i] + j, petsc_loc[j]));
581447629fSBarry Smith     }
599566063dSJacob Faibussowitsch     PetscCall(PetscFree2(app, petsc));
601447629fSBarry Smith 
611447629fSBarry Smith   } else {
621447629fSBarry Smith     /* send values */
639566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Send((void *)aomems->app_loc, map->n, MPIU_INT, 0, tag_app, PetscObjectComm((PetscObject)ao)));
649566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Send((void *)aomems->petsc_loc, map->n, MPIU_INT, 0, tag_petsc, PetscObjectComm((PetscObject)ao)));
651447629fSBarry Smith   }
669566063dSJacob Faibussowitsch   PetscCall(PetscViewerFlush(viewer));
671447629fSBarry Smith   PetscFunctionReturn(0);
681447629fSBarry Smith }
691447629fSBarry Smith 
70d71ae5a4SJacob Faibussowitsch PetscErrorCode AODestroy_MemoryScalable(AO ao)
71d71ae5a4SJacob Faibussowitsch {
721447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
731447629fSBarry Smith 
741447629fSBarry Smith   PetscFunctionBegin;
759566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aomems->app_loc, aomems->petsc_loc));
769566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&aomems->map));
779566063dSJacob Faibussowitsch   PetscCall(PetscFree(aomems));
781447629fSBarry Smith   PetscFunctionReturn(0);
791447629fSBarry Smith }
801447629fSBarry Smith 
811447629fSBarry Smith /*
821447629fSBarry Smith    Input Parameters:
831447629fSBarry Smith +   ao - the application ordering context
841447629fSBarry Smith .   n  - the number of integers in ia[]
851447629fSBarry Smith .   ia - the integers; these are replaced with their mapped value
861447629fSBarry Smith -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
871447629fSBarry Smith 
881447629fSBarry Smith    Output Parameter:
891447629fSBarry Smith .   ia - the mapped interges
901447629fSBarry Smith  */
91d71ae5a4SJacob Faibussowitsch PetscErrorCode AOMap_MemoryScalable_private(AO ao, PetscInt n, PetscInt *ia, const PetscInt *maploc)
92d71ae5a4SJacob Faibussowitsch {
931447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
941447629fSBarry Smith   MPI_Comm           comm;
951447629fSBarry Smith   PetscMPIInt        rank, size, tag1, tag2;
9676ec1555SBarry Smith   PetscInt          *owner, *start, *sizes, nsends, nreceives;
971447629fSBarry Smith   PetscInt           nmax, count, *sindices, *rindices, i, j, idx, lastidx, *sindices2, *rindices2;
986bd6ae52SBarry Smith   const PetscInt    *owners = aomems->map->range;
991447629fSBarry Smith   MPI_Request       *send_waits, *recv_waits, *send_waits2, *recv_waits2;
1001447629fSBarry Smith   MPI_Status         recv_status;
1011447629fSBarry Smith   PetscMPIInt        nindices, source, widx;
1021447629fSBarry Smith   PetscInt          *rbuf, *sbuf;
1031447629fSBarry Smith   MPI_Status        *send_status, *send_status2;
1041447629fSBarry Smith 
1051447629fSBarry Smith   PetscFunctionBegin;
1069566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)ao, &comm));
1079566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1089566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
1091447629fSBarry Smith 
1101447629fSBarry Smith   /*  first count number of contributors to each processor */
1119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &start));
1129566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(2 * size, &sizes, n, &owner));
1131447629fSBarry Smith 
1141447629fSBarry Smith   j       = 0;
1151447629fSBarry Smith   lastidx = -1;
1161447629fSBarry Smith   for (i = 0; i < n; i++) {
1176bd6ae52SBarry Smith     if (ia[i] < 0) owner[i] = -1;      /* mark negative entries (which are not to be mapped) with a special negative value */
1186bd6ae52SBarry Smith     if (ia[i] >= ao->N) owner[i] = -2; /* mark out of range entries with special negative value */
1196bd6ae52SBarry Smith     else {
1201447629fSBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
1211447629fSBarry Smith       if (lastidx > (idx = ia[i])) j = 0;
1221447629fSBarry Smith       lastidx = idx;
1231447629fSBarry Smith       for (; j < size; j++) {
1241447629fSBarry Smith         if (idx >= owners[j] && idx < owners[j + 1]) {
12576ec1555SBarry Smith           sizes[2 * j]++;       /* num of indices to be sent */
12676ec1555SBarry Smith           sizes[2 * j + 1] = 1; /* send to proc[j] */
1271447629fSBarry Smith           owner[i]         = j;
1281447629fSBarry Smith           break;
1291447629fSBarry Smith         }
1301447629fSBarry Smith       }
1311447629fSBarry Smith     }
1326bd6ae52SBarry Smith   }
13376ec1555SBarry Smith   sizes[2 * rank] = sizes[2 * rank + 1] = 0; /* do not receive from self! */
1341447629fSBarry Smith   nsends                                = 0;
13576ec1555SBarry Smith   for (i = 0; i < size; i++) nsends += sizes[2 * i + 1];
1361447629fSBarry Smith 
1371447629fSBarry Smith   /* inform other processors of number of messages and max length*/
1389566063dSJacob Faibussowitsch   PetscCall(PetscMaxSum(comm, sizes, &nmax, &nreceives));
1391447629fSBarry Smith 
1401447629fSBarry Smith   /* allocate arrays */
1419566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag1));
1429566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag2));
1431447629fSBarry Smith 
1449566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nreceives * nmax, &rindices, nreceives, &recv_waits));
1459566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nsends * nmax, &rindices2, nsends, &recv_waits2));
1461447629fSBarry Smith 
1479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(n, &sindices, nsends, &send_waits, nsends, &send_status));
1489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(n, &sindices2, nreceives, &send_waits2, nreceives, &send_status2));
1491447629fSBarry Smith 
1501447629fSBarry Smith   /* post 1st receives: receive others requests
1511447629fSBarry Smith      since we don't know how long each individual message is we
1521447629fSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1531447629fSBarry Smith      this is a lot of wasted space.
1541447629fSBarry Smith   */
15548a46eb9SPierre Jolivet   for (i = 0, count = 0; i < nreceives; i++) PetscCallMPI(MPI_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag1, comm, recv_waits + count++));
1561447629fSBarry Smith 
1571447629fSBarry Smith   /* do 1st sends:
1581447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1591447629fSBarry Smith          the ith processor
1601447629fSBarry Smith   */
1611447629fSBarry Smith   start[0] = 0;
16276ec1555SBarry Smith   for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
1631447629fSBarry Smith   for (i = 0; i < n; i++) {
1641447629fSBarry Smith     j = owner[i];
1656bd6ae52SBarry Smith     if (j == -1) continue; /* do not remap negative entries in ia[] */
1669371c9d4SSatish Balay     else if (j == -2) { /* out of range entries get mapped to -1 */ ia[i] = -1;
1676bd6ae52SBarry Smith       continue;
1686bd6ae52SBarry Smith     } else if (j != rank) {
1691447629fSBarry Smith       sindices[start[j]++] = ia[i];
1701447629fSBarry Smith     } else { /* compute my own map */
1711447629fSBarry Smith       ia[i] = maploc[ia[i] - owners[rank]];
1721447629fSBarry Smith     }
1731447629fSBarry Smith   }
1741447629fSBarry Smith 
1751447629fSBarry Smith   start[0] = 0;
17676ec1555SBarry Smith   for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
1771447629fSBarry Smith   for (i = 0, count = 0; i < size; i++) {
17876ec1555SBarry Smith     if (sizes[2 * i + 1]) {
1791447629fSBarry Smith       /* send my request to others */
1809566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices + start[i], sizes[2 * i], MPIU_INT, i, tag1, comm, send_waits + count));
1811447629fSBarry Smith       /* post receive for the answer of my request */
1829566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv(sindices2 + start[i], sizes[2 * i], MPIU_INT, i, tag2, comm, recv_waits2 + count));
1831447629fSBarry Smith       count++;
1841447629fSBarry Smith     }
1851447629fSBarry Smith   }
18608401ef6SPierre Jolivet   PetscCheck(nsends == count, comm, PETSC_ERR_SUP, "nsends %" PetscInt_FMT " != count %" PetscInt_FMT, nsends, count);
1871447629fSBarry Smith 
1881447629fSBarry Smith   /* wait on 1st sends */
18948a46eb9SPierre Jolivet   if (nsends) PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status));
1901447629fSBarry Smith 
1911447629fSBarry Smith   /* 1st recvs: other's requests */
1921447629fSBarry Smith   for (j = 0; j < nreceives; j++) {
1939566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitany(nreceives, recv_waits, &widx, &recv_status)); /* idx: index of handle for operation that completed */
1949566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &nindices));
1951447629fSBarry Smith     rbuf   = rindices + nmax * widx; /* global index */
1961447629fSBarry Smith     source = recv_status.MPI_SOURCE;
1971447629fSBarry Smith 
1981447629fSBarry Smith     /* compute mapping */
1991447629fSBarry Smith     sbuf = rbuf;
2001447629fSBarry Smith     for (i = 0; i < nindices; i++) sbuf[i] = maploc[rbuf[i] - owners[rank]];
2011447629fSBarry Smith 
2021447629fSBarry Smith     /* send mapping back to the sender */
2039566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Isend(sbuf, nindices, MPIU_INT, source, tag2, comm, send_waits2 + widx));
2041447629fSBarry Smith   }
2051447629fSBarry Smith 
2061447629fSBarry Smith   /* wait on 2nd sends */
20748a46eb9SPierre Jolivet   if (nreceives) PetscCallMPI(MPI_Waitall(nreceives, send_waits2, send_status2));
2081447629fSBarry Smith 
2091447629fSBarry Smith   /* 2nd recvs: for the answer of my request */
2101447629fSBarry Smith   for (j = 0; j < nsends; j++) {
2119566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitany(nsends, recv_waits2, &widx, &recv_status));
2129566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &nindices));
2131447629fSBarry Smith     source = recv_status.MPI_SOURCE;
2141447629fSBarry Smith     /* pack output ia[] */
2151447629fSBarry Smith     rbuf  = sindices2 + start[source];
2161447629fSBarry Smith     count = 0;
2171447629fSBarry Smith     for (i = 0; i < n; i++) {
2181447629fSBarry Smith       if (source == owner[i]) ia[i] = rbuf[count++];
2191447629fSBarry Smith     }
2201447629fSBarry Smith   }
2211447629fSBarry Smith 
2221447629fSBarry Smith   /* free arrays */
2239566063dSJacob Faibussowitsch   PetscCall(PetscFree(start));
2249566063dSJacob Faibussowitsch   PetscCall(PetscFree2(sizes, owner));
2259566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rindices, recv_waits));
2269566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rindices2, recv_waits2));
2279566063dSJacob Faibussowitsch   PetscCall(PetscFree3(sindices, send_waits, send_status));
2289566063dSJacob Faibussowitsch   PetscCall(PetscFree3(sindices2, send_waits2, send_status2));
2291447629fSBarry Smith   PetscFunctionReturn(0);
2301447629fSBarry Smith }
2311447629fSBarry Smith 
232d71ae5a4SJacob Faibussowitsch PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao, PetscInt n, PetscInt *ia)
233d71ae5a4SJacob Faibussowitsch {
2341447629fSBarry Smith   AO_MemoryScalable *aomems  = (AO_MemoryScalable *)ao->data;
2351447629fSBarry Smith   PetscInt          *app_loc = aomems->app_loc;
2361447629fSBarry Smith 
2371447629fSBarry Smith   PetscFunctionBegin;
2389566063dSJacob Faibussowitsch   PetscCall(AOMap_MemoryScalable_private(ao, n, ia, app_loc));
2391447629fSBarry Smith   PetscFunctionReturn(0);
2401447629fSBarry Smith }
2411447629fSBarry Smith 
242d71ae5a4SJacob Faibussowitsch PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao, PetscInt n, PetscInt *ia)
243d71ae5a4SJacob Faibussowitsch {
2441447629fSBarry Smith   AO_MemoryScalable *aomems    = (AO_MemoryScalable *)ao->data;
2451447629fSBarry Smith   PetscInt          *petsc_loc = aomems->petsc_loc;
2461447629fSBarry Smith 
2471447629fSBarry Smith   PetscFunctionBegin;
2489566063dSJacob Faibussowitsch   PetscCall(AOMap_MemoryScalable_private(ao, n, ia, petsc_loc));
2491447629fSBarry Smith   PetscFunctionReturn(0);
2501447629fSBarry Smith }
2511447629fSBarry Smith 
2521447629fSBarry Smith static struct _AOOps AOOps_MemoryScalable = {
253267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(view, AOView_MemoryScalable),
254267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(destroy, AODestroy_MemoryScalable),
255267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(petsctoapplication, AOPetscToApplication_MemoryScalable),
256267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(applicationtopetsc, AOApplicationToPetsc_MemoryScalable),
2571447629fSBarry Smith };
2581447629fSBarry Smith 
259d71ae5a4SJacob Faibussowitsch PetscErrorCode AOCreateMemoryScalable_private(MPI_Comm comm, PetscInt napp, const PetscInt from_array[], const PetscInt to_array[], AO ao, PetscInt *aomap_loc)
260d71ae5a4SJacob Faibussowitsch {
2611447629fSBarry Smith   AO_MemoryScalable *aomems  = (AO_MemoryScalable *)ao->data;
2621447629fSBarry Smith   PetscLayout        map     = aomems->map;
2631447629fSBarry Smith   PetscInt           n_local = map->n, i, j;
2641447629fSBarry Smith   PetscMPIInt        rank, size, tag;
26576ec1555SBarry Smith   PetscInt          *owner, *start, *sizes, nsends, nreceives;
2661447629fSBarry Smith   PetscInt           nmax, count, *sindices, *rindices, idx, lastidx;
2671447629fSBarry Smith   PetscInt          *owners = aomems->map->range;
2681447629fSBarry Smith   MPI_Request       *send_waits, *recv_waits;
2691447629fSBarry Smith   MPI_Status         recv_status;
2701447629fSBarry Smith   PetscMPIInt        nindices, widx;
2711447629fSBarry Smith   PetscInt          *rbuf;
2721447629fSBarry Smith   PetscInt           n = napp, ip, ia;
2731447629fSBarry Smith   MPI_Status        *send_status;
2741447629fSBarry Smith 
2751447629fSBarry Smith   PetscFunctionBegin;
2769566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(aomap_loc, n_local));
2771447629fSBarry Smith 
2789566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2799566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2801447629fSBarry Smith 
2811447629fSBarry Smith   /*  first count number of contributors (of from_array[]) to each processor */
2829566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2 * size, &sizes));
2839566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &owner));
2841447629fSBarry Smith 
2851447629fSBarry Smith   j       = 0;
2861447629fSBarry Smith   lastidx = -1;
2871447629fSBarry Smith   for (i = 0; i < n; i++) {
2881447629fSBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
2891447629fSBarry Smith     if (lastidx > (idx = from_array[i])) j = 0;
2901447629fSBarry Smith     lastidx = idx;
2911447629fSBarry Smith     for (; j < size; j++) {
2921447629fSBarry Smith       if (idx >= owners[j] && idx < owners[j + 1]) {
29376ec1555SBarry Smith         sizes[2 * j] += 2;    /* num of indices to be sent - in pairs (ip,ia) */
29476ec1555SBarry Smith         sizes[2 * j + 1] = 1; /* send to proc[j] */
2951447629fSBarry Smith         owner[i]         = j;
2961447629fSBarry Smith         break;
2971447629fSBarry Smith       }
2981447629fSBarry Smith     }
2991447629fSBarry Smith   }
30076ec1555SBarry Smith   sizes[2 * rank] = sizes[2 * rank + 1] = 0; /* do not receive from self! */
3011447629fSBarry Smith   nsends                                = 0;
30276ec1555SBarry Smith   for (i = 0; i < size; i++) nsends += sizes[2 * i + 1];
3031447629fSBarry Smith 
3041447629fSBarry Smith   /* inform other processors of number of messages and max length*/
3059566063dSJacob Faibussowitsch   PetscCall(PetscMaxSum(comm, sizes, &nmax, &nreceives));
3061447629fSBarry Smith 
3071447629fSBarry Smith   /* allocate arrays */
3089566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag));
3099566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nreceives * nmax, &rindices, nreceives, &recv_waits));
3109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(2 * n, &sindices, nsends, &send_waits, nsends, &send_status));
3119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &start));
3121447629fSBarry Smith 
3131447629fSBarry Smith   /* post receives: */
31448a46eb9SPierre Jolivet   for (i = 0; i < nreceives; i++) PetscCallMPI(MPI_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag, comm, recv_waits + i));
3151447629fSBarry Smith 
3161447629fSBarry Smith   /* do sends:
3171447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3181447629fSBarry Smith          the ith processor
3191447629fSBarry Smith   */
3201447629fSBarry Smith   start[0] = 0;
32176ec1555SBarry Smith   for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
3221447629fSBarry Smith   for (i = 0; i < n; i++) {
3231447629fSBarry Smith     j = owner[i];
3241447629fSBarry Smith     if (j != rank) {
3251447629fSBarry Smith       ip                   = from_array[i];
3261447629fSBarry Smith       ia                   = to_array[i];
3271447629fSBarry Smith       sindices[start[j]++] = ip;
3281447629fSBarry Smith       sindices[start[j]++] = ia;
3291447629fSBarry Smith     } else { /* compute my own map */
3301447629fSBarry Smith       ip            = from_array[i] - owners[rank];
3311447629fSBarry Smith       ia            = to_array[i];
3321447629fSBarry Smith       aomap_loc[ip] = ia;
3331447629fSBarry Smith     }
3341447629fSBarry Smith   }
3351447629fSBarry Smith 
3361447629fSBarry Smith   start[0] = 0;
33776ec1555SBarry Smith   for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
3381447629fSBarry Smith   for (i = 0, count = 0; i < size; i++) {
33976ec1555SBarry Smith     if (sizes[2 * i + 1]) {
3409566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices + start[i], sizes[2 * i], MPIU_INT, i, tag, comm, send_waits + count));
3411447629fSBarry Smith       count++;
3421447629fSBarry Smith     }
3431447629fSBarry Smith   }
34408401ef6SPierre Jolivet   PetscCheck(nsends == count, comm, PETSC_ERR_SUP, "nsends %" PetscInt_FMT " != count %" PetscInt_FMT, nsends, count);
3451447629fSBarry Smith 
3461447629fSBarry Smith   /* wait on sends */
34748a46eb9SPierre Jolivet   if (nsends) PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status));
3481447629fSBarry Smith 
3491447629fSBarry Smith   /* recvs */
3501447629fSBarry Smith   count = 0;
3511447629fSBarry Smith   for (j = nreceives; j > 0; j--) {
3529566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitany(nreceives, recv_waits, &widx, &recv_status));
3539566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &nindices));
3541447629fSBarry Smith     rbuf = rindices + nmax * widx; /* global index */
3551447629fSBarry Smith 
3561447629fSBarry Smith     /* compute local mapping */
3571447629fSBarry Smith     for (i = 0; i < nindices; i += 2) {       /* pack aomap_loc */
3581447629fSBarry Smith       ip            = rbuf[i] - owners[rank]; /* local index */
3591447629fSBarry Smith       ia            = rbuf[i + 1];
3601447629fSBarry Smith       aomap_loc[ip] = ia;
3611447629fSBarry Smith     }
3621447629fSBarry Smith     count++;
3631447629fSBarry Smith   }
3641447629fSBarry Smith 
3659566063dSJacob Faibussowitsch   PetscCall(PetscFree(start));
3669566063dSJacob Faibussowitsch   PetscCall(PetscFree3(sindices, send_waits, send_status));
3679566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rindices, recv_waits));
3689566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
3699566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
3701447629fSBarry Smith   PetscFunctionReturn(0);
3711447629fSBarry Smith }
3721447629fSBarry Smith 
373d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
374d71ae5a4SJacob Faibussowitsch {
3751447629fSBarry Smith   IS                 isapp = ao->isapp, ispetsc = ao->ispetsc;
3761447629fSBarry Smith   const PetscInt    *mypetsc, *myapp;
3771447629fSBarry Smith   PetscInt           napp, n_local, N, i, start, *petsc, *lens, *disp;
3781447629fSBarry Smith   MPI_Comm           comm;
3791447629fSBarry Smith   AO_MemoryScalable *aomems;
3801447629fSBarry Smith   PetscLayout        map;
3811447629fSBarry Smith   PetscMPIInt        size, rank;
3821447629fSBarry Smith 
3831447629fSBarry Smith   PetscFunctionBegin;
38428b400f6SJacob Faibussowitsch   PetscCheck(isapp, PetscObjectComm((PetscObject)ao), PETSC_ERR_ARG_WRONGSTATE, "AOSetIS() must be called before AOSetType()");
3851447629fSBarry Smith   /* create special struct aomems */
3864dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&aomems));
3871447629fSBarry Smith   ao->data = (void *)aomems;
3889566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(ao->ops, &AOOps_MemoryScalable, sizeof(struct _AOOps)));
3899566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)ao, AOMEMORYSCALABLE));
3901447629fSBarry Smith 
3911447629fSBarry Smith   /* transmit all local lengths of isapp to all processors */
3929566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)isapp, &comm));
3939566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
3949566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3959566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size, &lens, size, &disp));
3969566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isapp, &napp));
3979566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm));
3981447629fSBarry Smith 
3991447629fSBarry Smith   N = 0;
4001447629fSBarry Smith   for (i = 0; i < size; i++) {
4011447629fSBarry Smith     disp[i] = N;
4021447629fSBarry Smith     N += lens[i];
4031447629fSBarry Smith   }
4041447629fSBarry Smith 
4051447629fSBarry Smith   /* If ispetsc is 0 then use "natural" numbering */
4061447629fSBarry Smith   if (napp) {
4071447629fSBarry Smith     if (!ispetsc) {
4081447629fSBarry Smith       start = disp[rank];
4099566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(napp + 1, &petsc));
4101447629fSBarry Smith       for (i = 0; i < napp; i++) petsc[i] = start + i;
4111447629fSBarry Smith     } else {
4129566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(ispetsc, &mypetsc));
4131447629fSBarry Smith       petsc = (PetscInt *)mypetsc;
4141447629fSBarry Smith     }
4154a2f8832SBarry Smith   } else {
4164a2f8832SBarry Smith     petsc = NULL;
4171447629fSBarry Smith   }
4181447629fSBarry Smith 
4191447629fSBarry Smith   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
4209566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreate(comm, &map));
4211447629fSBarry Smith   map->bs = 1;
4221447629fSBarry Smith   map->N  = N;
4239566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(map));
4241447629fSBarry Smith 
4251447629fSBarry Smith   ao->N       = N;
4261447629fSBarry Smith   ao->n       = map->n;
4271447629fSBarry Smith   aomems->map = map;
4281447629fSBarry Smith 
4291447629fSBarry Smith   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
4301447629fSBarry Smith   n_local = map->n;
4319566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(n_local, &aomems->app_loc, n_local, &aomems->petsc_loc));
4329566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isapp, &myapp));
4331447629fSBarry Smith 
4349566063dSJacob Faibussowitsch   PetscCall(AOCreateMemoryScalable_private(comm, napp, petsc, myapp, ao, aomems->app_loc));
4359566063dSJacob Faibussowitsch   PetscCall(AOCreateMemoryScalable_private(comm, napp, myapp, petsc, ao, aomems->petsc_loc));
4361447629fSBarry Smith 
4379566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isapp, &myapp));
4381447629fSBarry Smith   if (napp) {
4391447629fSBarry Smith     if (ispetsc) {
4409566063dSJacob Faibussowitsch       PetscCall(ISRestoreIndices(ispetsc, &mypetsc));
4411447629fSBarry Smith     } else {
4429566063dSJacob Faibussowitsch       PetscCall(PetscFree(petsc));
4431447629fSBarry Smith     }
4441447629fSBarry Smith   }
4459566063dSJacob Faibussowitsch   PetscCall(PetscFree2(lens, disp));
4461447629fSBarry Smith   PetscFunctionReturn(0);
4471447629fSBarry Smith }
4481447629fSBarry Smith 
4491447629fSBarry Smith /*@C
4501447629fSBarry Smith    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
4511447629fSBarry Smith 
452d083f849SBarry Smith    Collective
4531447629fSBarry Smith 
4541447629fSBarry Smith    Input Parameters:
455cab54364SBarry Smith +  comm - MPI communicator that is to share the `AO`
4561447629fSBarry Smith .  napp - size of integer arrays
4571447629fSBarry Smith .  myapp - integer array that defines an ordering
4581447629fSBarry Smith -  mypetsc - integer array that defines another ordering (may be NULL to
4591447629fSBarry Smith              indicate the natural ordering, that is 0,1,2,3,...)
4601447629fSBarry Smith 
4611447629fSBarry Smith    Output Parameter:
4621447629fSBarry Smith .  aoout - the new application ordering
4631447629fSBarry Smith 
4641447629fSBarry Smith    Level: beginner
4651447629fSBarry Smith 
466cab54364SBarry Smith     Note:
46795452b02SPatrick Sanan     The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
468cab54364SBarry Smith     in the indices. Use `AOCreateMapping()` or `AOCreateMappingIS()` if you wish to have "holes" in the indices.
469cab54364SBarry Smith     Comparing with `AOCreateBasic()`, this routine trades memory with message communication.
4701447629fSBarry Smith 
471cab54364SBarry Smith .seealso: [](sec_ao), [](sec_scatter), `AO`, `AOCreateMemoryScalableIS()`, `AODestroy()`, `AOPetscToApplication()`, `AOApplicationToPetsc()`
4721447629fSBarry Smith @*/
473d71ae5a4SJacob Faibussowitsch PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm, PetscInt napp, const PetscInt myapp[], const PetscInt mypetsc[], AO *aoout)
474d71ae5a4SJacob Faibussowitsch {
4751447629fSBarry Smith   IS              isapp, ispetsc;
4761447629fSBarry Smith   const PetscInt *app = myapp, *petsc = mypetsc;
4771447629fSBarry Smith 
4781447629fSBarry Smith   PetscFunctionBegin;
4799566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(comm, napp, app, PETSC_USE_POINTER, &isapp));
4801447629fSBarry Smith   if (mypetsc) {
4819566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(comm, napp, petsc, PETSC_USE_POINTER, &ispetsc));
4821447629fSBarry Smith   } else {
4831447629fSBarry Smith     ispetsc = NULL;
4841447629fSBarry Smith   }
4859566063dSJacob Faibussowitsch   PetscCall(AOCreateMemoryScalableIS(isapp, ispetsc, aoout));
4869566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&isapp));
48748a46eb9SPierre Jolivet   if (mypetsc) PetscCall(ISDestroy(&ispetsc));
4881447629fSBarry Smith   PetscFunctionReturn(0);
4891447629fSBarry Smith }
4901447629fSBarry Smith 
4911447629fSBarry Smith /*@C
4921447629fSBarry Smith    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
4931447629fSBarry Smith 
494*c3339decSBarry Smith    Collective
4951447629fSBarry Smith 
4961447629fSBarry Smith    Input Parameters:
4971447629fSBarry Smith +  isapp - index set that defines an ordering
4981447629fSBarry Smith -  ispetsc - index set that defines another ordering (may be NULL to use the
4991447629fSBarry Smith              natural ordering)
5001447629fSBarry Smith 
5011447629fSBarry Smith    Output Parameter:
5021447629fSBarry Smith .  aoout - the new application ordering
5031447629fSBarry Smith 
5041447629fSBarry Smith    Level: beginner
5051447629fSBarry Smith 
50695452b02SPatrick Sanan     Notes:
50795452b02SPatrick Sanan     The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
5081447629fSBarry Smith     that is there cannot be any "holes".
509cab54364SBarry Smith 
510cab54364SBarry Smith     Comparing with `AOCreateBasicIS()`, this routine trades memory with message communication.
511cab54364SBarry Smith 
512cab54364SBarry Smith .seealso: [](sec_ao), [](sec_scatter), `AO`, `AOCreateBasicIS()`, `AOCreateMemoryScalable()`, `AODestroy()`
5131447629fSBarry Smith @*/
514d71ae5a4SJacob Faibussowitsch PetscErrorCode AOCreateMemoryScalableIS(IS isapp, IS ispetsc, AO *aoout)
515d71ae5a4SJacob Faibussowitsch {
5161447629fSBarry Smith   MPI_Comm comm;
5171447629fSBarry Smith   AO       ao;
5181447629fSBarry Smith 
5191447629fSBarry Smith   PetscFunctionBegin;
5209566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)isapp, &comm));
5219566063dSJacob Faibussowitsch   PetscCall(AOCreate(comm, &ao));
5229566063dSJacob Faibussowitsch   PetscCall(AOSetIS(ao, isapp, ispetsc));
5239566063dSJacob Faibussowitsch   PetscCall(AOSetType(ao, AOMEMORYSCALABLE));
5249566063dSJacob Faibussowitsch   PetscCall(AOViewFromOptions(ao, NULL, "-ao_view"));
5251447629fSBarry Smith   *aoout = ao;
5261447629fSBarry Smith   PetscFunctionReturn(0);
5271447629fSBarry Smith }
528