xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision b6971eaeabe91bae028772bab3157cdc0eb03a76)
11447629fSBarry Smith /*
21447629fSBarry Smith     The memory scalable AO application ordering routines. These store the
36bd6ae52SBarry Smith   orderings on each processor for that processor's range of values
41447629fSBarry Smith */
51447629fSBarry Smith 
61447629fSBarry Smith #include <../src/vec/is/ao/aoimpl.h> /*I  "petscao.h"   I*/
71447629fSBarry Smith 
81447629fSBarry Smith typedef struct {
91447629fSBarry Smith   PetscInt   *app_loc;   /* app_loc[i] is the partner for the ith local PETSc slot */
101447629fSBarry Smith   PetscInt   *petsc_loc; /* petsc_loc[j] is the partner for the jth local app slot */
111447629fSBarry Smith   PetscLayout map;       /* determines the local sizes of ao */
121447629fSBarry Smith } AO_MemoryScalable;
131447629fSBarry Smith 
141447629fSBarry Smith /*
156bd6ae52SBarry Smith        All processors ship the data to process 0 to be printed; note that this is not scalable because
166bd6ae52SBarry Smith        process 0 allocates space for all the orderings entry across all the processes
171447629fSBarry Smith */
18da8c939bSJacob Faibussowitsch static PetscErrorCode AOView_MemoryScalable(AO ao, PetscViewer viewer)
19d71ae5a4SJacob Faibussowitsch {
201447629fSBarry Smith   PetscMPIInt        rank, size;
211447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
221447629fSBarry Smith   PetscBool          iascii;
231447629fSBarry Smith   PetscMPIInt        tag_app, tag_petsc;
241447629fSBarry Smith   PetscLayout        map = aomems->map;
251447629fSBarry Smith   PetscInt          *app, *app_loc, *petsc, *petsc_loc, len, i, j;
261447629fSBarry Smith   MPI_Status         status;
271447629fSBarry Smith 
281447629fSBarry Smith   PetscFunctionBegin;
299566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
3028b400f6SJacob Faibussowitsch   PetscCheck(iascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not supported for AO MemoryScalable", ((PetscObject)viewer)->type_name);
311447629fSBarry Smith 
329566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ao), &rank));
339566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ao), &size));
341447629fSBarry Smith 
359566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag_app));
369566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag_petsc));
371447629fSBarry Smith 
38dd400576SPatrick Sanan   if (rank == 0) {
399566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "Number of elements in ordering %" PetscInt_FMT "\n", ao->N));
409566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "PETSc->App  App->PETSc\n"));
411447629fSBarry Smith 
429566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(map->N, &app, map->N, &petsc));
431447629fSBarry Smith     len = map->n;
441447629fSBarry Smith     /* print local AO */
459566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", rank));
4648a46eb9SPierre Jolivet     for (i = 0; i < len; i++) PetscCall(PetscViewerASCIIPrintf(viewer, "%3" PetscInt_FMT "  %3" PetscInt_FMT "    %3" PetscInt_FMT "  %3" PetscInt_FMT "\n", i, aomems->app_loc[i], i, aomems->petsc_loc[i]));
471447629fSBarry Smith 
481447629fSBarry Smith     /* recv and print off-processor's AO */
491447629fSBarry Smith     for (i = 1; i < size; i++) {
501447629fSBarry Smith       len       = map->range[i + 1] - map->range[i];
511447629fSBarry Smith       app_loc   = app + map->range[i];
521447629fSBarry Smith       petsc_loc = petsc + map->range[i];
539566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(app_loc, (PetscMPIInt)len, MPIU_INT, i, tag_app, PetscObjectComm((PetscObject)ao), &status));
549566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(petsc_loc, (PetscMPIInt)len, MPIU_INT, i, tag_petsc, PetscObjectComm((PetscObject)ao), &status));
559566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%" PetscInt_FMT "]\n", i));
5648a46eb9SPierre Jolivet       for (j = 0; j < len; j++) PetscCall(PetscViewerASCIIPrintf(viewer, "%3" PetscInt_FMT "  %3" PetscInt_FMT "    %3" PetscInt_FMT "  %3" PetscInt_FMT "\n", map->range[i] + j, app_loc[j], map->range[i] + j, petsc_loc[j]));
571447629fSBarry Smith     }
589566063dSJacob Faibussowitsch     PetscCall(PetscFree2(app, petsc));
591447629fSBarry Smith 
601447629fSBarry Smith   } else {
611447629fSBarry Smith     /* send values */
629566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Send((void *)aomems->app_loc, map->n, MPIU_INT, 0, tag_app, PetscObjectComm((PetscObject)ao)));
639566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Send((void *)aomems->petsc_loc, map->n, MPIU_INT, 0, tag_petsc, PetscObjectComm((PetscObject)ao)));
641447629fSBarry Smith   }
659566063dSJacob Faibussowitsch   PetscCall(PetscViewerFlush(viewer));
663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
671447629fSBarry Smith }
681447629fSBarry Smith 
69da8c939bSJacob Faibussowitsch static PetscErrorCode AODestroy_MemoryScalable(AO ao)
70d71ae5a4SJacob Faibussowitsch {
711447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
721447629fSBarry Smith 
731447629fSBarry Smith   PetscFunctionBegin;
749566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aomems->app_loc, aomems->petsc_loc));
759566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&aomems->map));
769566063dSJacob Faibussowitsch   PetscCall(PetscFree(aomems));
773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
781447629fSBarry Smith }
791447629fSBarry Smith 
801447629fSBarry Smith /*
811447629fSBarry Smith    Input Parameters:
821447629fSBarry Smith +   ao - the application ordering context
831447629fSBarry Smith .   n  - the number of integers in ia[]
841447629fSBarry Smith .   ia - the integers; these are replaced with their mapped value
851447629fSBarry Smith -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
861447629fSBarry Smith 
871447629fSBarry Smith    Output Parameter:
881447629fSBarry Smith .   ia - the mapped interges
891447629fSBarry Smith  */
90da8c939bSJacob Faibussowitsch static PetscErrorCode AOMap_MemoryScalable_private(AO ao, PetscInt n, PetscInt *ia, const PetscInt *maploc)
91d71ae5a4SJacob Faibussowitsch {
921447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
931447629fSBarry Smith   MPI_Comm           comm;
941447629fSBarry Smith   PetscMPIInt        rank, size, tag1, tag2;
9576ec1555SBarry Smith   PetscInt          *owner, *start, *sizes, nsends, nreceives;
961447629fSBarry Smith   PetscInt           nmax, count, *sindices, *rindices, i, j, idx, lastidx, *sindices2, *rindices2;
976bd6ae52SBarry Smith   const PetscInt    *owners = aomems->map->range;
981447629fSBarry Smith   MPI_Request       *send_waits, *recv_waits, *send_waits2, *recv_waits2;
991447629fSBarry Smith   MPI_Status         recv_status;
1001447629fSBarry Smith   PetscMPIInt        nindices, source, widx;
1011447629fSBarry Smith   PetscInt          *rbuf, *sbuf;
1021447629fSBarry Smith   MPI_Status        *send_status, *send_status2;
1031447629fSBarry Smith 
1041447629fSBarry Smith   PetscFunctionBegin;
1059566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)ao, &comm));
1069566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1079566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
1081447629fSBarry Smith 
1091447629fSBarry Smith   /*  first count number of contributors to each processor */
1109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &start));
1119566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(2 * size, &sizes, n, &owner));
1121447629fSBarry Smith 
1131447629fSBarry Smith   j       = 0;
1141447629fSBarry Smith   lastidx = -1;
1151447629fSBarry Smith   for (i = 0; i < n; i++) {
1166bd6ae52SBarry Smith     if (ia[i] < 0) owner[i] = -1;      /* mark negative entries (which are not to be mapped) with a special negative value */
1176bd6ae52SBarry Smith     if (ia[i] >= ao->N) owner[i] = -2; /* mark out of range entries with special negative value */
1186bd6ae52SBarry Smith     else {
1191447629fSBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
1201447629fSBarry Smith       if (lastidx > (idx = ia[i])) j = 0;
1211447629fSBarry Smith       lastidx = idx;
1221447629fSBarry Smith       for (; j < size; j++) {
1231447629fSBarry Smith         if (idx >= owners[j] && idx < owners[j + 1]) {
12476ec1555SBarry Smith           sizes[2 * j]++;       /* num of indices to be sent */
12576ec1555SBarry Smith           sizes[2 * j + 1] = 1; /* send to proc[j] */
1261447629fSBarry Smith           owner[i]         = j;
1271447629fSBarry Smith           break;
1281447629fSBarry Smith         }
1291447629fSBarry Smith       }
1301447629fSBarry Smith     }
1316bd6ae52SBarry Smith   }
13276ec1555SBarry Smith   sizes[2 * rank] = sizes[2 * rank + 1] = 0; /* do not receive from self! */
1331447629fSBarry Smith   nsends                                = 0;
13476ec1555SBarry Smith   for (i = 0; i < size; i++) nsends += sizes[2 * i + 1];
1351447629fSBarry Smith 
1361447629fSBarry Smith   /* inform other processors of number of messages and max length*/
1379566063dSJacob Faibussowitsch   PetscCall(PetscMaxSum(comm, sizes, &nmax, &nreceives));
1381447629fSBarry Smith 
1391447629fSBarry Smith   /* allocate arrays */
1409566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag1));
1419566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag2));
1421447629fSBarry Smith 
1439566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nreceives * nmax, &rindices, nreceives, &recv_waits));
1449566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nsends * nmax, &rindices2, nsends, &recv_waits2));
1451447629fSBarry Smith 
1469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(n, &sindices, nsends, &send_waits, nsends, &send_status));
1479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(n, &sindices2, nreceives, &send_waits2, nreceives, &send_status2));
1481447629fSBarry Smith 
1491447629fSBarry Smith   /* post 1st receives: receive others requests
1501447629fSBarry Smith      since we don't know how long each individual message is we
1511447629fSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1521447629fSBarry Smith      this is a lot of wasted space.
1531447629fSBarry Smith   */
15448a46eb9SPierre Jolivet   for (i = 0, count = 0; i < nreceives; i++) PetscCallMPI(MPI_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag1, comm, recv_waits + count++));
1551447629fSBarry Smith 
1561447629fSBarry Smith   /* do 1st sends:
1571447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1581447629fSBarry Smith          the ith processor
1591447629fSBarry Smith   */
1601447629fSBarry Smith   start[0] = 0;
16176ec1555SBarry Smith   for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
1621447629fSBarry Smith   for (i = 0; i < n; i++) {
1631447629fSBarry Smith     j = owner[i];
1646bd6ae52SBarry Smith     if (j == -1) continue; /* do not remap negative entries in ia[] */
1659371c9d4SSatish Balay     else if (j == -2) { /* out of range entries get mapped to -1 */ ia[i] = -1;
1666bd6ae52SBarry Smith       continue;
1676bd6ae52SBarry Smith     } else if (j != rank) {
1681447629fSBarry Smith       sindices[start[j]++] = ia[i];
1691447629fSBarry Smith     } else { /* compute my own map */
1701447629fSBarry Smith       ia[i] = maploc[ia[i] - owners[rank]];
1711447629fSBarry Smith     }
1721447629fSBarry Smith   }
1731447629fSBarry Smith 
1741447629fSBarry Smith   start[0] = 0;
17576ec1555SBarry Smith   for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
1761447629fSBarry Smith   for (i = 0, count = 0; i < size; i++) {
17776ec1555SBarry Smith     if (sizes[2 * i + 1]) {
1781447629fSBarry Smith       /* send my request to others */
1799566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices + start[i], sizes[2 * i], MPIU_INT, i, tag1, comm, send_waits + count));
1801447629fSBarry Smith       /* post receive for the answer of my request */
1819566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv(sindices2 + start[i], sizes[2 * i], MPIU_INT, i, tag2, comm, recv_waits2 + count));
1821447629fSBarry Smith       count++;
1831447629fSBarry Smith     }
1841447629fSBarry Smith   }
18508401ef6SPierre Jolivet   PetscCheck(nsends == count, comm, PETSC_ERR_SUP, "nsends %" PetscInt_FMT " != count %" PetscInt_FMT, nsends, count);
1861447629fSBarry Smith 
1871447629fSBarry Smith   /* wait on 1st sends */
18848a46eb9SPierre Jolivet   if (nsends) PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status));
1891447629fSBarry Smith 
1901447629fSBarry Smith   /* 1st recvs: other's requests */
1911447629fSBarry Smith   for (j = 0; j < nreceives; j++) {
1929566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitany(nreceives, recv_waits, &widx, &recv_status)); /* idx: index of handle for operation that completed */
1939566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &nindices));
1941447629fSBarry Smith     rbuf   = rindices + nmax * widx; /* global index */
1951447629fSBarry Smith     source = recv_status.MPI_SOURCE;
1961447629fSBarry Smith 
1971447629fSBarry Smith     /* compute mapping */
1981447629fSBarry Smith     sbuf = rbuf;
1991447629fSBarry Smith     for (i = 0; i < nindices; i++) sbuf[i] = maploc[rbuf[i] - owners[rank]];
2001447629fSBarry Smith 
2011447629fSBarry Smith     /* send mapping back to the sender */
2029566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Isend(sbuf, nindices, MPIU_INT, source, tag2, comm, send_waits2 + widx));
2031447629fSBarry Smith   }
2041447629fSBarry Smith 
2051447629fSBarry Smith   /* wait on 2nd sends */
20648a46eb9SPierre Jolivet   if (nreceives) PetscCallMPI(MPI_Waitall(nreceives, send_waits2, send_status2));
2071447629fSBarry Smith 
2081447629fSBarry Smith   /* 2nd recvs: for the answer of my request */
2091447629fSBarry Smith   for (j = 0; j < nsends; j++) {
2109566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitany(nsends, recv_waits2, &widx, &recv_status));
2119566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &nindices));
2121447629fSBarry Smith     source = recv_status.MPI_SOURCE;
2131447629fSBarry Smith     /* pack output ia[] */
2141447629fSBarry Smith     rbuf  = sindices2 + start[source];
2151447629fSBarry Smith     count = 0;
2161447629fSBarry Smith     for (i = 0; i < n; i++) {
2171447629fSBarry Smith       if (source == owner[i]) ia[i] = rbuf[count++];
2181447629fSBarry Smith     }
2191447629fSBarry Smith   }
2201447629fSBarry Smith 
2211447629fSBarry Smith   /* free arrays */
2229566063dSJacob Faibussowitsch   PetscCall(PetscFree(start));
2239566063dSJacob Faibussowitsch   PetscCall(PetscFree2(sizes, owner));
2249566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rindices, recv_waits));
2259566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rindices2, recv_waits2));
2269566063dSJacob Faibussowitsch   PetscCall(PetscFree3(sindices, send_waits, send_status));
2279566063dSJacob Faibussowitsch   PetscCall(PetscFree3(sindices2, send_waits2, send_status2));
2283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2291447629fSBarry Smith }
2301447629fSBarry Smith 
231da8c939bSJacob Faibussowitsch static PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao, PetscInt n, PetscInt *ia)
232d71ae5a4SJacob Faibussowitsch {
2331447629fSBarry Smith   AO_MemoryScalable *aomems  = (AO_MemoryScalable *)ao->data;
2341447629fSBarry Smith   PetscInt          *app_loc = aomems->app_loc;
2351447629fSBarry Smith 
2361447629fSBarry Smith   PetscFunctionBegin;
2379566063dSJacob Faibussowitsch   PetscCall(AOMap_MemoryScalable_private(ao, n, ia, app_loc));
2383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2391447629fSBarry Smith }
2401447629fSBarry Smith 
241da8c939bSJacob Faibussowitsch static PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao, PetscInt n, PetscInt *ia)
242d71ae5a4SJacob Faibussowitsch {
2431447629fSBarry Smith   AO_MemoryScalable *aomems    = (AO_MemoryScalable *)ao->data;
2441447629fSBarry Smith   PetscInt          *petsc_loc = aomems->petsc_loc;
2451447629fSBarry Smith 
2461447629fSBarry Smith   PetscFunctionBegin;
2479566063dSJacob Faibussowitsch   PetscCall(AOMap_MemoryScalable_private(ao, n, ia, petsc_loc));
2483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2491447629fSBarry Smith }
2501447629fSBarry Smith 
251da8c939bSJacob Faibussowitsch static const struct _AOOps AOOps_MemoryScalable = {
252267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(view, AOView_MemoryScalable),
253267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(destroy, AODestroy_MemoryScalable),
254267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(petsctoapplication, AOPetscToApplication_MemoryScalable),
255267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(applicationtopetsc, AOApplicationToPetsc_MemoryScalable),
2561447629fSBarry Smith };
2571447629fSBarry Smith 
258da8c939bSJacob Faibussowitsch static PetscErrorCode AOCreateMemoryScalable_private(MPI_Comm comm, PetscInt napp, const PetscInt from_array[], const PetscInt to_array[], AO ao, PetscInt *aomap_loc)
259d71ae5a4SJacob Faibussowitsch {
2601447629fSBarry Smith   AO_MemoryScalable *aomems  = (AO_MemoryScalable *)ao->data;
2611447629fSBarry Smith   PetscLayout        map     = aomems->map;
2621447629fSBarry Smith   PetscInt           n_local = map->n, i, j;
2631447629fSBarry Smith   PetscMPIInt        rank, size, tag;
26476ec1555SBarry Smith   PetscInt          *owner, *start, *sizes, nsends, nreceives;
2651447629fSBarry Smith   PetscInt           nmax, count, *sindices, *rindices, idx, lastidx;
2661447629fSBarry Smith   PetscInt          *owners = aomems->map->range;
2671447629fSBarry Smith   MPI_Request       *send_waits, *recv_waits;
2681447629fSBarry Smith   MPI_Status         recv_status;
2691447629fSBarry Smith   PetscMPIInt        nindices, widx;
2701447629fSBarry Smith   PetscInt          *rbuf;
2711447629fSBarry Smith   PetscInt           n = napp, ip, ia;
2721447629fSBarry Smith   MPI_Status        *send_status;
2731447629fSBarry Smith 
2741447629fSBarry Smith   PetscFunctionBegin;
2759566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(aomap_loc, n_local));
2761447629fSBarry Smith 
2779566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2789566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2791447629fSBarry Smith 
2801447629fSBarry Smith   /*  first count number of contributors (of from_array[]) to each processor */
2819566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2 * size, &sizes));
2829566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &owner));
2831447629fSBarry Smith 
2841447629fSBarry Smith   j       = 0;
2851447629fSBarry Smith   lastidx = -1;
2861447629fSBarry Smith   for (i = 0; i < n; i++) {
2871447629fSBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
2881447629fSBarry Smith     if (lastidx > (idx = from_array[i])) j = 0;
2891447629fSBarry Smith     lastidx = idx;
2901447629fSBarry Smith     for (; j < size; j++) {
2911447629fSBarry Smith       if (idx >= owners[j] && idx < owners[j + 1]) {
29276ec1555SBarry Smith         sizes[2 * j] += 2;    /* num of indices to be sent - in pairs (ip,ia) */
29376ec1555SBarry Smith         sizes[2 * j + 1] = 1; /* send to proc[j] */
2941447629fSBarry Smith         owner[i]         = j;
2951447629fSBarry Smith         break;
2961447629fSBarry Smith       }
2971447629fSBarry Smith     }
2981447629fSBarry Smith   }
29976ec1555SBarry Smith   sizes[2 * rank] = sizes[2 * rank + 1] = 0; /* do not receive from self! */
3001447629fSBarry Smith   nsends                                = 0;
30176ec1555SBarry Smith   for (i = 0; i < size; i++) nsends += sizes[2 * i + 1];
3021447629fSBarry Smith 
3031447629fSBarry Smith   /* inform other processors of number of messages and max length*/
3049566063dSJacob Faibussowitsch   PetscCall(PetscMaxSum(comm, sizes, &nmax, &nreceives));
3051447629fSBarry Smith 
3061447629fSBarry Smith   /* allocate arrays */
3079566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag));
3089566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nreceives * nmax, &rindices, nreceives, &recv_waits));
3099566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(2 * n, &sindices, nsends, &send_waits, nsends, &send_status));
3109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &start));
3111447629fSBarry Smith 
3121447629fSBarry Smith   /* post receives: */
31348a46eb9SPierre Jolivet   for (i = 0; i < nreceives; i++) PetscCallMPI(MPI_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag, comm, recv_waits + i));
3141447629fSBarry Smith 
3151447629fSBarry Smith   /* do sends:
3161447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3171447629fSBarry Smith          the ith processor
3181447629fSBarry Smith   */
3191447629fSBarry Smith   start[0] = 0;
32076ec1555SBarry Smith   for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
3211447629fSBarry Smith   for (i = 0; i < n; i++) {
3221447629fSBarry Smith     j = owner[i];
3231447629fSBarry Smith     if (j != rank) {
3241447629fSBarry Smith       ip                   = from_array[i];
3251447629fSBarry Smith       ia                   = to_array[i];
3261447629fSBarry Smith       sindices[start[j]++] = ip;
3271447629fSBarry Smith       sindices[start[j]++] = ia;
3281447629fSBarry Smith     } else { /* compute my own map */
3291447629fSBarry Smith       ip            = from_array[i] - owners[rank];
3301447629fSBarry Smith       ia            = to_array[i];
3311447629fSBarry Smith       aomap_loc[ip] = ia;
3321447629fSBarry Smith     }
3331447629fSBarry Smith   }
3341447629fSBarry Smith 
3351447629fSBarry Smith   start[0] = 0;
33676ec1555SBarry Smith   for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
3371447629fSBarry Smith   for (i = 0, count = 0; i < size; i++) {
33876ec1555SBarry Smith     if (sizes[2 * i + 1]) {
3399566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices + start[i], sizes[2 * i], MPIU_INT, i, tag, comm, send_waits + count));
3401447629fSBarry Smith       count++;
3411447629fSBarry Smith     }
3421447629fSBarry Smith   }
34308401ef6SPierre Jolivet   PetscCheck(nsends == count, comm, PETSC_ERR_SUP, "nsends %" PetscInt_FMT " != count %" PetscInt_FMT, nsends, count);
3441447629fSBarry Smith 
3451447629fSBarry Smith   /* wait on sends */
34648a46eb9SPierre Jolivet   if (nsends) PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status));
3471447629fSBarry Smith 
3481447629fSBarry Smith   /* recvs */
3491447629fSBarry Smith   count = 0;
3501447629fSBarry Smith   for (j = nreceives; j > 0; j--) {
3519566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitany(nreceives, recv_waits, &widx, &recv_status));
3529566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &nindices));
3531447629fSBarry Smith     rbuf = rindices + nmax * widx; /* global index */
3541447629fSBarry Smith 
3551447629fSBarry Smith     /* compute local mapping */
3561447629fSBarry Smith     for (i = 0; i < nindices; i += 2) {       /* pack aomap_loc */
3571447629fSBarry Smith       ip            = rbuf[i] - owners[rank]; /* local index */
3581447629fSBarry Smith       ia            = rbuf[i + 1];
3591447629fSBarry Smith       aomap_loc[ip] = ia;
3601447629fSBarry Smith     }
3611447629fSBarry Smith     count++;
3621447629fSBarry Smith   }
3631447629fSBarry Smith 
3649566063dSJacob Faibussowitsch   PetscCall(PetscFree(start));
3659566063dSJacob Faibussowitsch   PetscCall(PetscFree3(sindices, send_waits, send_status));
3669566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rindices, recv_waits));
3679566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
3689566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
3693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3701447629fSBarry Smith }
3711447629fSBarry Smith 
372da8c939bSJacob Faibussowitsch PETSC_INTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
373d71ae5a4SJacob Faibussowitsch {
3741447629fSBarry Smith   IS                 isapp = ao->isapp, ispetsc = ao->ispetsc;
3751447629fSBarry Smith   const PetscInt    *mypetsc, *myapp;
3761447629fSBarry Smith   PetscInt           napp, n_local, N, i, start, *petsc, *lens, *disp;
3771447629fSBarry Smith   MPI_Comm           comm;
3781447629fSBarry Smith   AO_MemoryScalable *aomems;
3791447629fSBarry Smith   PetscLayout        map;
3801447629fSBarry Smith   PetscMPIInt        size, rank;
3811447629fSBarry Smith 
3821447629fSBarry Smith   PetscFunctionBegin;
38328b400f6SJacob Faibussowitsch   PetscCheck(isapp, PetscObjectComm((PetscObject)ao), PETSC_ERR_ARG_WRONGSTATE, "AOSetIS() must be called before AOSetType()");
3841447629fSBarry Smith   /* create special struct aomems */
3854dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&aomems));
3861447629fSBarry Smith   ao->data   = (void *)aomems;
387aea10558SJacob Faibussowitsch   ao->ops[0] = AOOps_MemoryScalable;
3889566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)ao, AOMEMORYSCALABLE));
3891447629fSBarry Smith 
3901447629fSBarry Smith   /* transmit all local lengths of isapp to all processors */
3919566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)isapp, &comm));
3929566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
3939566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3949566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size, &lens, size, &disp));
3959566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isapp, &napp));
3969566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm));
3971447629fSBarry Smith 
3981447629fSBarry Smith   N = 0;
3991447629fSBarry Smith   for (i = 0; i < size; i++) {
4001447629fSBarry Smith     disp[i] = N;
4011447629fSBarry Smith     N += lens[i];
4021447629fSBarry Smith   }
4031447629fSBarry Smith 
4041447629fSBarry Smith   /* If ispetsc is 0 then use "natural" numbering */
4051447629fSBarry Smith   if (napp) {
4061447629fSBarry Smith     if (!ispetsc) {
4071447629fSBarry Smith       start = disp[rank];
4089566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(napp + 1, &petsc));
4091447629fSBarry Smith       for (i = 0; i < napp; i++) petsc[i] = start + i;
4101447629fSBarry Smith     } else {
4119566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(ispetsc, &mypetsc));
4121447629fSBarry Smith       petsc = (PetscInt *)mypetsc;
4131447629fSBarry Smith     }
4144a2f8832SBarry Smith   } else {
4154a2f8832SBarry Smith     petsc = NULL;
4161447629fSBarry Smith   }
4171447629fSBarry Smith 
4181447629fSBarry Smith   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
4199566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreate(comm, &map));
4201447629fSBarry Smith   map->bs = 1;
4211447629fSBarry Smith   map->N  = N;
4229566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(map));
4231447629fSBarry Smith 
4241447629fSBarry Smith   ao->N       = N;
4251447629fSBarry Smith   ao->n       = map->n;
4261447629fSBarry Smith   aomems->map = map;
4271447629fSBarry Smith 
4281447629fSBarry Smith   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
4291447629fSBarry Smith   n_local = map->n;
4309566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(n_local, &aomems->app_loc, n_local, &aomems->petsc_loc));
4319566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isapp, &myapp));
4321447629fSBarry Smith 
4339566063dSJacob Faibussowitsch   PetscCall(AOCreateMemoryScalable_private(comm, napp, petsc, myapp, ao, aomems->app_loc));
4349566063dSJacob Faibussowitsch   PetscCall(AOCreateMemoryScalable_private(comm, napp, myapp, petsc, ao, aomems->petsc_loc));
4351447629fSBarry Smith 
4369566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isapp, &myapp));
4371447629fSBarry Smith   if (napp) {
4381447629fSBarry Smith     if (ispetsc) {
4399566063dSJacob Faibussowitsch       PetscCall(ISRestoreIndices(ispetsc, &mypetsc));
4401447629fSBarry Smith     } else {
4419566063dSJacob Faibussowitsch       PetscCall(PetscFree(petsc));
4421447629fSBarry Smith     }
4431447629fSBarry Smith   }
4449566063dSJacob Faibussowitsch   PetscCall(PetscFree2(lens, disp));
4453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4461447629fSBarry Smith }
4471447629fSBarry Smith 
4481447629fSBarry Smith /*@C
4491447629fSBarry Smith   AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
4501447629fSBarry Smith 
451d083f849SBarry Smith   Collective
4521447629fSBarry Smith 
4531447629fSBarry Smith   Input Parameters:
454cab54364SBarry Smith + comm    - MPI communicator that is to share the `AO`
4551447629fSBarry Smith . napp    - size of integer arrays
4561447629fSBarry Smith . myapp   - integer array that defines an ordering
457*b6971eaeSBarry Smith - mypetsc - integer array that defines another ordering (may be `NULL` to indicate the natural ordering, that is 0,1,2,3,...)
4581447629fSBarry Smith 
4591447629fSBarry Smith   Output Parameter:
4601447629fSBarry Smith . aoout - the new application ordering
4611447629fSBarry Smith 
4621447629fSBarry Smith   Level: beginner
4631447629fSBarry Smith 
464cab54364SBarry Smith   Note:
465*b6971eaeSBarry Smith   The arrays `myapp` and `mypetsc` must contain the all the integers 0 to `napp`-1 with no duplicates; that is there cannot be any "holes"
466cab54364SBarry Smith   in the indices. Use `AOCreateMapping()` or `AOCreateMappingIS()` if you wish to have "holes" in the indices.
467cab54364SBarry Smith   Comparing with `AOCreateBasic()`, this routine trades memory with message communication.
4681447629fSBarry Smith 
469cab54364SBarry Smith .seealso: [](sec_ao), [](sec_scatter), `AO`, `AOCreateMemoryScalableIS()`, `AODestroy()`, `AOPetscToApplication()`, `AOApplicationToPetsc()`
4701447629fSBarry Smith @*/
471d71ae5a4SJacob Faibussowitsch PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm, PetscInt napp, const PetscInt myapp[], const PetscInt mypetsc[], AO *aoout)
472d71ae5a4SJacob Faibussowitsch {
4731447629fSBarry Smith   IS              isapp, ispetsc;
4741447629fSBarry Smith   const PetscInt *app = myapp, *petsc = mypetsc;
4751447629fSBarry Smith 
4761447629fSBarry Smith   PetscFunctionBegin;
4779566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(comm, napp, app, PETSC_USE_POINTER, &isapp));
4781447629fSBarry Smith   if (mypetsc) {
4799566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(comm, napp, petsc, PETSC_USE_POINTER, &ispetsc));
4801447629fSBarry Smith   } else {
4811447629fSBarry Smith     ispetsc = NULL;
4821447629fSBarry Smith   }
4839566063dSJacob Faibussowitsch   PetscCall(AOCreateMemoryScalableIS(isapp, ispetsc, aoout));
4849566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&isapp));
48548a46eb9SPierre Jolivet   if (mypetsc) PetscCall(ISDestroy(&ispetsc));
4863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4871447629fSBarry Smith }
4881447629fSBarry Smith 
4891447629fSBarry Smith /*@C
4901447629fSBarry Smith   AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
4911447629fSBarry Smith 
492c3339decSBarry Smith   Collective
4931447629fSBarry Smith 
4941447629fSBarry Smith   Input Parameters:
4951447629fSBarry Smith + isapp   - index set that defines an ordering
496*b6971eaeSBarry Smith - ispetsc - index set that defines another ordering (may be `NULL` to use the natural ordering)
4971447629fSBarry Smith 
4981447629fSBarry Smith   Output Parameter:
4991447629fSBarry Smith . aoout - the new application ordering
5001447629fSBarry Smith 
5011447629fSBarry Smith   Level: beginner
5021447629fSBarry Smith 
50395452b02SPatrick Sanan   Notes:
504*b6971eaeSBarry Smith   The index sets `isapp` and `ispetsc` must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
5051447629fSBarry Smith   that is there cannot be any "holes".
506cab54364SBarry Smith 
507cab54364SBarry Smith   Comparing with `AOCreateBasicIS()`, this routine trades memory with message communication.
508cab54364SBarry Smith 
509cab54364SBarry Smith .seealso: [](sec_ao), [](sec_scatter), `AO`, `AOCreateBasicIS()`, `AOCreateMemoryScalable()`, `AODestroy()`
5101447629fSBarry Smith @*/
511d71ae5a4SJacob Faibussowitsch PetscErrorCode AOCreateMemoryScalableIS(IS isapp, IS ispetsc, AO *aoout)
512d71ae5a4SJacob Faibussowitsch {
5131447629fSBarry Smith   MPI_Comm comm;
5141447629fSBarry Smith   AO       ao;
5151447629fSBarry Smith 
5161447629fSBarry Smith   PetscFunctionBegin;
5179566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)isapp, &comm));
5189566063dSJacob Faibussowitsch   PetscCall(AOCreate(comm, &ao));
5199566063dSJacob Faibussowitsch   PetscCall(AOSetIS(ao, isapp, ispetsc));
5209566063dSJacob Faibussowitsch   PetscCall(AOSetType(ao, AOMEMORYSCALABLE));
5219566063dSJacob Faibussowitsch   PetscCall(AOViewFromOptions(ao, NULL, "-ao_view"));
5221447629fSBarry Smith   *aoout = ao;
5233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5241447629fSBarry Smith }
525