xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision 4dfa11a44d5adf2389f1d3acbc8f3c1116dc6c3a)
11447629fSBarry Smith 
21447629fSBarry Smith /*
31447629fSBarry Smith     The memory scalable AO application ordering routines. These store the
46bd6ae52SBarry Smith   orderings on each processor for that processor's range of values
51447629fSBarry Smith */
61447629fSBarry Smith 
71447629fSBarry Smith #include <../src/vec/is/ao/aoimpl.h> /*I  "petscao.h"   I*/
81447629fSBarry Smith 
91447629fSBarry Smith typedef struct {
101447629fSBarry Smith   PetscInt   *app_loc;   /* app_loc[i] is the partner for the ith local PETSc slot */
111447629fSBarry Smith   PetscInt   *petsc_loc; /* petsc_loc[j] is the partner for the jth local app slot */
121447629fSBarry Smith   PetscLayout map;       /* determines the local sizes of ao */
131447629fSBarry Smith } AO_MemoryScalable;
141447629fSBarry Smith 
151447629fSBarry Smith /*
166bd6ae52SBarry Smith        All processors ship the data to process 0 to be printed; note that this is not scalable because
176bd6ae52SBarry Smith        process 0 allocates space for all the orderings entry across all the processes
181447629fSBarry Smith */
199371c9d4SSatish Balay PetscErrorCode AOView_MemoryScalable(AO ao, PetscViewer viewer) {
201447629fSBarry Smith   PetscMPIInt        rank, size;
211447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
221447629fSBarry Smith   PetscBool          iascii;
231447629fSBarry Smith   PetscMPIInt        tag_app, tag_petsc;
241447629fSBarry Smith   PetscLayout        map = aomems->map;
251447629fSBarry Smith   PetscInt          *app, *app_loc, *petsc, *petsc_loc, len, i, j;
261447629fSBarry Smith   MPI_Status         status;
271447629fSBarry Smith 
281447629fSBarry Smith   PetscFunctionBegin;
299566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
3028b400f6SJacob Faibussowitsch   PetscCheck(iascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not supported for AO MemoryScalable", ((PetscObject)viewer)->type_name);
311447629fSBarry Smith 
329566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ao), &rank));
339566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ao), &size));
341447629fSBarry Smith 
359566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag_app));
369566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag_petsc));
371447629fSBarry Smith 
38dd400576SPatrick Sanan   if (rank == 0) {
399566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "Number of elements in ordering %" PetscInt_FMT "\n", ao->N));
409566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "PETSc->App  App->PETSc\n"));
411447629fSBarry Smith 
429566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(map->N, &app, map->N, &petsc));
431447629fSBarry Smith     len = map->n;
441447629fSBarry Smith     /* print local AO */
459566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", rank));
4648a46eb9SPierre Jolivet     for (i = 0; i < len; i++) PetscCall(PetscViewerASCIIPrintf(viewer, "%3" PetscInt_FMT "  %3" PetscInt_FMT "    %3" PetscInt_FMT "  %3" PetscInt_FMT "\n", i, aomems->app_loc[i], i, aomems->petsc_loc[i]));
471447629fSBarry Smith 
481447629fSBarry Smith     /* recv and print off-processor's AO */
491447629fSBarry Smith     for (i = 1; i < size; i++) {
501447629fSBarry Smith       len       = map->range[i + 1] - map->range[i];
511447629fSBarry Smith       app_loc   = app + map->range[i];
521447629fSBarry Smith       petsc_loc = petsc + map->range[i];
539566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(app_loc, (PetscMPIInt)len, MPIU_INT, i, tag_app, PetscObjectComm((PetscObject)ao), &status));
549566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(petsc_loc, (PetscMPIInt)len, MPIU_INT, i, tag_petsc, PetscObjectComm((PetscObject)ao), &status));
559566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%" PetscInt_FMT "]\n", i));
5648a46eb9SPierre Jolivet       for (j = 0; j < len; j++) PetscCall(PetscViewerASCIIPrintf(viewer, "%3" PetscInt_FMT "  %3" PetscInt_FMT "    %3" PetscInt_FMT "  %3" PetscInt_FMT "\n", map->range[i] + j, app_loc[j], map->range[i] + j, petsc_loc[j]));
571447629fSBarry Smith     }
589566063dSJacob Faibussowitsch     PetscCall(PetscFree2(app, petsc));
591447629fSBarry Smith 
601447629fSBarry Smith   } else {
611447629fSBarry Smith     /* send values */
629566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Send((void *)aomems->app_loc, map->n, MPIU_INT, 0, tag_app, PetscObjectComm((PetscObject)ao)));
639566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Send((void *)aomems->petsc_loc, map->n, MPIU_INT, 0, tag_petsc, PetscObjectComm((PetscObject)ao)));
641447629fSBarry Smith   }
659566063dSJacob Faibussowitsch   PetscCall(PetscViewerFlush(viewer));
661447629fSBarry Smith   PetscFunctionReturn(0);
671447629fSBarry Smith }
681447629fSBarry Smith 
699371c9d4SSatish Balay PetscErrorCode AODestroy_MemoryScalable(AO ao) {
701447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
711447629fSBarry Smith 
721447629fSBarry Smith   PetscFunctionBegin;
739566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aomems->app_loc, aomems->petsc_loc));
749566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&aomems->map));
759566063dSJacob Faibussowitsch   PetscCall(PetscFree(aomems));
761447629fSBarry Smith   PetscFunctionReturn(0);
771447629fSBarry Smith }
781447629fSBarry Smith 
791447629fSBarry Smith /*
801447629fSBarry Smith    Input Parameters:
811447629fSBarry Smith +   ao - the application ordering context
821447629fSBarry Smith .   n  - the number of integers in ia[]
831447629fSBarry Smith .   ia - the integers; these are replaced with their mapped value
841447629fSBarry Smith -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
851447629fSBarry Smith 
861447629fSBarry Smith    Output Parameter:
871447629fSBarry Smith .   ia - the mapped interges
881447629fSBarry Smith  */
899371c9d4SSatish Balay PetscErrorCode AOMap_MemoryScalable_private(AO ao, PetscInt n, PetscInt *ia, const PetscInt *maploc) {
901447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
911447629fSBarry Smith   MPI_Comm           comm;
921447629fSBarry Smith   PetscMPIInt        rank, size, tag1, tag2;
9376ec1555SBarry Smith   PetscInt          *owner, *start, *sizes, nsends, nreceives;
941447629fSBarry Smith   PetscInt           nmax, count, *sindices, *rindices, i, j, idx, lastidx, *sindices2, *rindices2;
956bd6ae52SBarry Smith   const PetscInt    *owners = aomems->map->range;
961447629fSBarry Smith   MPI_Request       *send_waits, *recv_waits, *send_waits2, *recv_waits2;
971447629fSBarry Smith   MPI_Status         recv_status;
981447629fSBarry Smith   PetscMPIInt        nindices, source, widx;
991447629fSBarry Smith   PetscInt          *rbuf, *sbuf;
1001447629fSBarry Smith   MPI_Status        *send_status, *send_status2;
1011447629fSBarry Smith 
1021447629fSBarry Smith   PetscFunctionBegin;
1039566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)ao, &comm));
1049566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1059566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
1061447629fSBarry Smith 
1071447629fSBarry Smith   /*  first count number of contributors to each processor */
1089566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &start));
1099566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(2 * size, &sizes, n, &owner));
1101447629fSBarry Smith 
1111447629fSBarry Smith   j       = 0;
1121447629fSBarry Smith   lastidx = -1;
1131447629fSBarry Smith   for (i = 0; i < n; i++) {
1146bd6ae52SBarry Smith     if (ia[i] < 0) owner[i] = -1;      /* mark negative entries (which are not to be mapped) with a special negative value */
1156bd6ae52SBarry Smith     if (ia[i] >= ao->N) owner[i] = -2; /* mark out of range entries with special negative value */
1166bd6ae52SBarry Smith     else {
1171447629fSBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
1181447629fSBarry Smith       if (lastidx > (idx = ia[i])) j = 0;
1191447629fSBarry Smith       lastidx = idx;
1201447629fSBarry Smith       for (; j < size; j++) {
1211447629fSBarry Smith         if (idx >= owners[j] && idx < owners[j + 1]) {
12276ec1555SBarry Smith           sizes[2 * j]++;       /* num of indices to be sent */
12376ec1555SBarry Smith           sizes[2 * j + 1] = 1; /* send to proc[j] */
1241447629fSBarry Smith           owner[i]         = j;
1251447629fSBarry Smith           break;
1261447629fSBarry Smith         }
1271447629fSBarry Smith       }
1281447629fSBarry Smith     }
1296bd6ae52SBarry Smith   }
13076ec1555SBarry Smith   sizes[2 * rank] = sizes[2 * rank + 1] = 0; /* do not receive from self! */
1311447629fSBarry Smith   nsends                                = 0;
13276ec1555SBarry Smith   for (i = 0; i < size; i++) nsends += sizes[2 * i + 1];
1331447629fSBarry Smith 
1341447629fSBarry Smith   /* inform other processors of number of messages and max length*/
1359566063dSJacob Faibussowitsch   PetscCall(PetscMaxSum(comm, sizes, &nmax, &nreceives));
1361447629fSBarry Smith 
1371447629fSBarry Smith   /* allocate arrays */
1389566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag1));
1399566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag2));
1401447629fSBarry Smith 
1419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nreceives * nmax, &rindices, nreceives, &recv_waits));
1429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nsends * nmax, &rindices2, nsends, &recv_waits2));
1431447629fSBarry Smith 
1449566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(n, &sindices, nsends, &send_waits, nsends, &send_status));
1459566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(n, &sindices2, nreceives, &send_waits2, nreceives, &send_status2));
1461447629fSBarry Smith 
1471447629fSBarry Smith   /* post 1st receives: receive others requests
1481447629fSBarry Smith      since we don't know how long each individual message is we
1491447629fSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1501447629fSBarry Smith      this is a lot of wasted space.
1511447629fSBarry Smith   */
15248a46eb9SPierre Jolivet   for (i = 0, count = 0; i < nreceives; i++) PetscCallMPI(MPI_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag1, comm, recv_waits + count++));
1531447629fSBarry Smith 
1541447629fSBarry Smith   /* do 1st sends:
1551447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1561447629fSBarry Smith          the ith processor
1571447629fSBarry Smith   */
1581447629fSBarry Smith   start[0] = 0;
15976ec1555SBarry Smith   for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
1601447629fSBarry Smith   for (i = 0; i < n; i++) {
1611447629fSBarry Smith     j = owner[i];
1626bd6ae52SBarry Smith     if (j == -1) continue; /* do not remap negative entries in ia[] */
1639371c9d4SSatish Balay     else if (j == -2) { /* out of range entries get mapped to -1 */ ia[i] = -1;
1646bd6ae52SBarry Smith       continue;
1656bd6ae52SBarry Smith     } else if (j != rank) {
1661447629fSBarry Smith       sindices[start[j]++] = ia[i];
1671447629fSBarry Smith     } else { /* compute my own map */
1681447629fSBarry Smith       ia[i] = maploc[ia[i] - owners[rank]];
1691447629fSBarry Smith     }
1701447629fSBarry Smith   }
1711447629fSBarry Smith 
1721447629fSBarry Smith   start[0] = 0;
17376ec1555SBarry Smith   for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
1741447629fSBarry Smith   for (i = 0, count = 0; i < size; i++) {
17576ec1555SBarry Smith     if (sizes[2 * i + 1]) {
1761447629fSBarry Smith       /* send my request to others */
1779566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices + start[i], sizes[2 * i], MPIU_INT, i, tag1, comm, send_waits + count));
1781447629fSBarry Smith       /* post receive for the answer of my request */
1799566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Irecv(sindices2 + start[i], sizes[2 * i], MPIU_INT, i, tag2, comm, recv_waits2 + count));
1801447629fSBarry Smith       count++;
1811447629fSBarry Smith     }
1821447629fSBarry Smith   }
18308401ef6SPierre Jolivet   PetscCheck(nsends == count, comm, PETSC_ERR_SUP, "nsends %" PetscInt_FMT " != count %" PetscInt_FMT, nsends, count);
1841447629fSBarry Smith 
1851447629fSBarry Smith   /* wait on 1st sends */
18648a46eb9SPierre Jolivet   if (nsends) PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status));
1871447629fSBarry Smith 
1881447629fSBarry Smith   /* 1st recvs: other's requests */
1891447629fSBarry Smith   for (j = 0; j < nreceives; j++) {
1909566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitany(nreceives, recv_waits, &widx, &recv_status)); /* idx: index of handle for operation that completed */
1919566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &nindices));
1921447629fSBarry Smith     rbuf   = rindices + nmax * widx; /* global index */
1931447629fSBarry Smith     source = recv_status.MPI_SOURCE;
1941447629fSBarry Smith 
1951447629fSBarry Smith     /* compute mapping */
1961447629fSBarry Smith     sbuf = rbuf;
1971447629fSBarry Smith     for (i = 0; i < nindices; i++) sbuf[i] = maploc[rbuf[i] - owners[rank]];
1981447629fSBarry Smith 
1991447629fSBarry Smith     /* send mapping back to the sender */
2009566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Isend(sbuf, nindices, MPIU_INT, source, tag2, comm, send_waits2 + widx));
2011447629fSBarry Smith   }
2021447629fSBarry Smith 
2031447629fSBarry Smith   /* wait on 2nd sends */
20448a46eb9SPierre Jolivet   if (nreceives) PetscCallMPI(MPI_Waitall(nreceives, send_waits2, send_status2));
2051447629fSBarry Smith 
2061447629fSBarry Smith   /* 2nd recvs: for the answer of my request */
2071447629fSBarry Smith   for (j = 0; j < nsends; j++) {
2089566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitany(nsends, recv_waits2, &widx, &recv_status));
2099566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &nindices));
2101447629fSBarry Smith     source = recv_status.MPI_SOURCE;
2111447629fSBarry Smith     /* pack output ia[] */
2121447629fSBarry Smith     rbuf   = sindices2 + start[source];
2131447629fSBarry Smith     count  = 0;
2141447629fSBarry Smith     for (i = 0; i < n; i++) {
2151447629fSBarry Smith       if (source == owner[i]) ia[i] = rbuf[count++];
2161447629fSBarry Smith     }
2171447629fSBarry Smith   }
2181447629fSBarry Smith 
2191447629fSBarry Smith   /* free arrays */
2209566063dSJacob Faibussowitsch   PetscCall(PetscFree(start));
2219566063dSJacob Faibussowitsch   PetscCall(PetscFree2(sizes, owner));
2229566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rindices, recv_waits));
2239566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rindices2, recv_waits2));
2249566063dSJacob Faibussowitsch   PetscCall(PetscFree3(sindices, send_waits, send_status));
2259566063dSJacob Faibussowitsch   PetscCall(PetscFree3(sindices2, send_waits2, send_status2));
2261447629fSBarry Smith   PetscFunctionReturn(0);
2271447629fSBarry Smith }
2281447629fSBarry Smith 
2299371c9d4SSatish Balay PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao, PetscInt n, PetscInt *ia) {
2301447629fSBarry Smith   AO_MemoryScalable *aomems  = (AO_MemoryScalable *)ao->data;
2311447629fSBarry Smith   PetscInt          *app_loc = aomems->app_loc;
2321447629fSBarry Smith 
2331447629fSBarry Smith   PetscFunctionBegin;
2349566063dSJacob Faibussowitsch   PetscCall(AOMap_MemoryScalable_private(ao, n, ia, app_loc));
2351447629fSBarry Smith   PetscFunctionReturn(0);
2361447629fSBarry Smith }
2371447629fSBarry Smith 
2389371c9d4SSatish Balay PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao, PetscInt n, PetscInt *ia) {
2391447629fSBarry Smith   AO_MemoryScalable *aomems    = (AO_MemoryScalable *)ao->data;
2401447629fSBarry Smith   PetscInt          *petsc_loc = aomems->petsc_loc;
2411447629fSBarry Smith 
2421447629fSBarry Smith   PetscFunctionBegin;
2439566063dSJacob Faibussowitsch   PetscCall(AOMap_MemoryScalable_private(ao, n, ia, petsc_loc));
2441447629fSBarry Smith   PetscFunctionReturn(0);
2451447629fSBarry Smith }
2461447629fSBarry Smith 
2471447629fSBarry Smith static struct _AOOps AOOps_MemoryScalable = {
248267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(view, AOView_MemoryScalable),
249267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(destroy, AODestroy_MemoryScalable),
250267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(petsctoapplication, AOPetscToApplication_MemoryScalable),
251267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(applicationtopetsc, AOApplicationToPetsc_MemoryScalable),
2521447629fSBarry Smith };
2531447629fSBarry Smith 
2549371c9d4SSatish Balay PetscErrorCode AOCreateMemoryScalable_private(MPI_Comm comm, PetscInt napp, const PetscInt from_array[], const PetscInt to_array[], AO ao, PetscInt *aomap_loc) {
2551447629fSBarry Smith   AO_MemoryScalable *aomems  = (AO_MemoryScalable *)ao->data;
2561447629fSBarry Smith   PetscLayout        map     = aomems->map;
2571447629fSBarry Smith   PetscInt           n_local = map->n, i, j;
2581447629fSBarry Smith   PetscMPIInt        rank, size, tag;
25976ec1555SBarry Smith   PetscInt          *owner, *start, *sizes, nsends, nreceives;
2601447629fSBarry Smith   PetscInt           nmax, count, *sindices, *rindices, idx, lastidx;
2611447629fSBarry Smith   PetscInt          *owners = aomems->map->range;
2621447629fSBarry Smith   MPI_Request       *send_waits, *recv_waits;
2631447629fSBarry Smith   MPI_Status         recv_status;
2641447629fSBarry Smith   PetscMPIInt        nindices, widx;
2651447629fSBarry Smith   PetscInt          *rbuf;
2661447629fSBarry Smith   PetscInt           n = napp, ip, ia;
2671447629fSBarry Smith   MPI_Status        *send_status;
2681447629fSBarry Smith 
2691447629fSBarry Smith   PetscFunctionBegin;
2709566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(aomap_loc, n_local));
2711447629fSBarry Smith 
2729566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2739566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2741447629fSBarry Smith 
2751447629fSBarry Smith   /*  first count number of contributors (of from_array[]) to each processor */
2769566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(2 * size, &sizes));
2779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &owner));
2781447629fSBarry Smith 
2791447629fSBarry Smith   j       = 0;
2801447629fSBarry Smith   lastidx = -1;
2811447629fSBarry Smith   for (i = 0; i < n; i++) {
2821447629fSBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
2831447629fSBarry Smith     if (lastidx > (idx = from_array[i])) j = 0;
2841447629fSBarry Smith     lastidx = idx;
2851447629fSBarry Smith     for (; j < size; j++) {
2861447629fSBarry Smith       if (idx >= owners[j] && idx < owners[j + 1]) {
28776ec1555SBarry Smith         sizes[2 * j] += 2;    /* num of indices to be sent - in pairs (ip,ia) */
28876ec1555SBarry Smith         sizes[2 * j + 1] = 1; /* send to proc[j] */
2891447629fSBarry Smith         owner[i]         = j;
2901447629fSBarry Smith         break;
2911447629fSBarry Smith       }
2921447629fSBarry Smith     }
2931447629fSBarry Smith   }
29476ec1555SBarry Smith   sizes[2 * rank] = sizes[2 * rank + 1] = 0; /* do not receive from self! */
2951447629fSBarry Smith   nsends                                = 0;
29676ec1555SBarry Smith   for (i = 0; i < size; i++) nsends += sizes[2 * i + 1];
2971447629fSBarry Smith 
2981447629fSBarry Smith   /* inform other processors of number of messages and max length*/
2999566063dSJacob Faibussowitsch   PetscCall(PetscMaxSum(comm, sizes, &nmax, &nreceives));
3001447629fSBarry Smith 
3011447629fSBarry Smith   /* allocate arrays */
3029566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag));
3039566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nreceives * nmax, &rindices, nreceives, &recv_waits));
3049566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(2 * n, &sindices, nsends, &send_waits, nsends, &send_status));
3059566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &start));
3061447629fSBarry Smith 
3071447629fSBarry Smith   /* post receives: */
30848a46eb9SPierre Jolivet   for (i = 0; i < nreceives; i++) PetscCallMPI(MPI_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag, comm, recv_waits + i));
3091447629fSBarry Smith 
3101447629fSBarry Smith   /* do sends:
3111447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3121447629fSBarry Smith          the ith processor
3131447629fSBarry Smith   */
3141447629fSBarry Smith   start[0] = 0;
31576ec1555SBarry Smith   for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
3161447629fSBarry Smith   for (i = 0; i < n; i++) {
3171447629fSBarry Smith     j = owner[i];
3181447629fSBarry Smith     if (j != rank) {
3191447629fSBarry Smith       ip                   = from_array[i];
3201447629fSBarry Smith       ia                   = to_array[i];
3211447629fSBarry Smith       sindices[start[j]++] = ip;
3221447629fSBarry Smith       sindices[start[j]++] = ia;
3231447629fSBarry Smith     } else { /* compute my own map */
3241447629fSBarry Smith       ip            = from_array[i] - owners[rank];
3251447629fSBarry Smith       ia            = to_array[i];
3261447629fSBarry Smith       aomap_loc[ip] = ia;
3271447629fSBarry Smith     }
3281447629fSBarry Smith   }
3291447629fSBarry Smith 
3301447629fSBarry Smith   start[0] = 0;
33176ec1555SBarry Smith   for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
3321447629fSBarry Smith   for (i = 0, count = 0; i < size; i++) {
33376ec1555SBarry Smith     if (sizes[2 * i + 1]) {
3349566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Isend(sindices + start[i], sizes[2 * i], MPIU_INT, i, tag, comm, send_waits + count));
3351447629fSBarry Smith       count++;
3361447629fSBarry Smith     }
3371447629fSBarry Smith   }
33808401ef6SPierre Jolivet   PetscCheck(nsends == count, comm, PETSC_ERR_SUP, "nsends %" PetscInt_FMT " != count %" PetscInt_FMT, nsends, count);
3391447629fSBarry Smith 
3401447629fSBarry Smith   /* wait on sends */
34148a46eb9SPierre Jolivet   if (nsends) PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status));
3421447629fSBarry Smith 
3431447629fSBarry Smith   /* recvs */
3441447629fSBarry Smith   count = 0;
3451447629fSBarry Smith   for (j = nreceives; j > 0; j--) {
3469566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Waitany(nreceives, recv_waits, &widx, &recv_status));
3479566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &nindices));
3481447629fSBarry Smith     rbuf = rindices + nmax * widx; /* global index */
3491447629fSBarry Smith 
3501447629fSBarry Smith     /* compute local mapping */
3511447629fSBarry Smith     for (i = 0; i < nindices; i += 2) {       /* pack aomap_loc */
3521447629fSBarry Smith       ip            = rbuf[i] - owners[rank]; /* local index */
3531447629fSBarry Smith       ia            = rbuf[i + 1];
3541447629fSBarry Smith       aomap_loc[ip] = ia;
3551447629fSBarry Smith     }
3561447629fSBarry Smith     count++;
3571447629fSBarry Smith   }
3581447629fSBarry Smith 
3599566063dSJacob Faibussowitsch   PetscCall(PetscFree(start));
3609566063dSJacob Faibussowitsch   PetscCall(PetscFree3(sindices, send_waits, send_status));
3619566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rindices, recv_waits));
3629566063dSJacob Faibussowitsch   PetscCall(PetscFree(owner));
3639566063dSJacob Faibussowitsch   PetscCall(PetscFree(sizes));
3641447629fSBarry Smith   PetscFunctionReturn(0);
3651447629fSBarry Smith }
3661447629fSBarry Smith 
3679371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao) {
3681447629fSBarry Smith   IS                 isapp = ao->isapp, ispetsc = ao->ispetsc;
3691447629fSBarry Smith   const PetscInt    *mypetsc, *myapp;
3701447629fSBarry Smith   PetscInt           napp, n_local, N, i, start, *petsc, *lens, *disp;
3711447629fSBarry Smith   MPI_Comm           comm;
3721447629fSBarry Smith   AO_MemoryScalable *aomems;
3731447629fSBarry Smith   PetscLayout        map;
3741447629fSBarry Smith   PetscMPIInt        size, rank;
3751447629fSBarry Smith 
3761447629fSBarry Smith   PetscFunctionBegin;
37728b400f6SJacob Faibussowitsch   PetscCheck(isapp, PetscObjectComm((PetscObject)ao), PETSC_ERR_ARG_WRONGSTATE, "AOSetIS() must be called before AOSetType()");
3781447629fSBarry Smith   /* create special struct aomems */
379*4dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&aomems));
3801447629fSBarry Smith   ao->data = (void *)aomems;
3819566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(ao->ops, &AOOps_MemoryScalable, sizeof(struct _AOOps)));
3829566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)ao, AOMEMORYSCALABLE));
3831447629fSBarry Smith 
3841447629fSBarry Smith   /* transmit all local lengths of isapp to all processors */
3859566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)isapp, &comm));
3869566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
3879566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3889566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(size, &lens, size, &disp));
3899566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isapp, &napp));
3909566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm));
3911447629fSBarry Smith 
3921447629fSBarry Smith   N = 0;
3931447629fSBarry Smith   for (i = 0; i < size; i++) {
3941447629fSBarry Smith     disp[i] = N;
3951447629fSBarry Smith     N += lens[i];
3961447629fSBarry Smith   }
3971447629fSBarry Smith 
3981447629fSBarry Smith   /* If ispetsc is 0 then use "natural" numbering */
3991447629fSBarry Smith   if (napp) {
4001447629fSBarry Smith     if (!ispetsc) {
4011447629fSBarry Smith       start = disp[rank];
4029566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(napp + 1, &petsc));
4031447629fSBarry Smith       for (i = 0; i < napp; i++) petsc[i] = start + i;
4041447629fSBarry Smith     } else {
4059566063dSJacob Faibussowitsch       PetscCall(ISGetIndices(ispetsc, &mypetsc));
4061447629fSBarry Smith       petsc = (PetscInt *)mypetsc;
4071447629fSBarry Smith     }
4084a2f8832SBarry Smith   } else {
4094a2f8832SBarry Smith     petsc = NULL;
4101447629fSBarry Smith   }
4111447629fSBarry Smith 
4121447629fSBarry Smith   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
4139566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreate(comm, &map));
4141447629fSBarry Smith   map->bs = 1;
4151447629fSBarry Smith   map->N  = N;
4169566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(map));
4171447629fSBarry Smith 
4181447629fSBarry Smith   ao->N       = N;
4191447629fSBarry Smith   ao->n       = map->n;
4201447629fSBarry Smith   aomems->map = map;
4211447629fSBarry Smith 
4221447629fSBarry Smith   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
4231447629fSBarry Smith   n_local = map->n;
4249566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(n_local, &aomems->app_loc, n_local, &aomems->petsc_loc));
4259566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isapp, &myapp));
4261447629fSBarry Smith 
4279566063dSJacob Faibussowitsch   PetscCall(AOCreateMemoryScalable_private(comm, napp, petsc, myapp, ao, aomems->app_loc));
4289566063dSJacob Faibussowitsch   PetscCall(AOCreateMemoryScalable_private(comm, napp, myapp, petsc, ao, aomems->petsc_loc));
4291447629fSBarry Smith 
4309566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isapp, &myapp));
4311447629fSBarry Smith   if (napp) {
4321447629fSBarry Smith     if (ispetsc) {
4339566063dSJacob Faibussowitsch       PetscCall(ISRestoreIndices(ispetsc, &mypetsc));
4341447629fSBarry Smith     } else {
4359566063dSJacob Faibussowitsch       PetscCall(PetscFree(petsc));
4361447629fSBarry Smith     }
4371447629fSBarry Smith   }
4389566063dSJacob Faibussowitsch   PetscCall(PetscFree2(lens, disp));
4391447629fSBarry Smith   PetscFunctionReturn(0);
4401447629fSBarry Smith }
4411447629fSBarry Smith 
4421447629fSBarry Smith /*@C
4431447629fSBarry Smith    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
4441447629fSBarry Smith 
445d083f849SBarry Smith    Collective
4461447629fSBarry Smith 
4471447629fSBarry Smith    Input Parameters:
4481447629fSBarry Smith +  comm - MPI communicator that is to share AO
4491447629fSBarry Smith .  napp - size of integer arrays
4501447629fSBarry Smith .  myapp - integer array that defines an ordering
4511447629fSBarry Smith -  mypetsc - integer array that defines another ordering (may be NULL to
4521447629fSBarry Smith              indicate the natural ordering, that is 0,1,2,3,...)
4531447629fSBarry Smith 
4541447629fSBarry Smith    Output Parameter:
4551447629fSBarry Smith .  aoout - the new application ordering
4561447629fSBarry Smith 
4571447629fSBarry Smith    Level: beginner
4581447629fSBarry Smith 
45995452b02SPatrick Sanan     Notes:
46095452b02SPatrick Sanan     The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
4611447629fSBarry Smith            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
4621447629fSBarry Smith            Comparing with AOCreateBasic(), this routine trades memory with message communication.
4631447629fSBarry Smith 
464db781477SPatrick Sanan .seealso: `AOCreateMemoryScalableIS()`, `AODestroy()`, `AOPetscToApplication()`, `AOApplicationToPetsc()`
4651447629fSBarry Smith @*/
4669371c9d4SSatish Balay PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm, PetscInt napp, const PetscInt myapp[], const PetscInt mypetsc[], AO *aoout) {
4671447629fSBarry Smith   IS              isapp, ispetsc;
4681447629fSBarry Smith   const PetscInt *app = myapp, *petsc = mypetsc;
4691447629fSBarry Smith 
4701447629fSBarry Smith   PetscFunctionBegin;
4719566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(comm, napp, app, PETSC_USE_POINTER, &isapp));
4721447629fSBarry Smith   if (mypetsc) {
4739566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(comm, napp, petsc, PETSC_USE_POINTER, &ispetsc));
4741447629fSBarry Smith   } else {
4751447629fSBarry Smith     ispetsc = NULL;
4761447629fSBarry Smith   }
4779566063dSJacob Faibussowitsch   PetscCall(AOCreateMemoryScalableIS(isapp, ispetsc, aoout));
4789566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&isapp));
47948a46eb9SPierre Jolivet   if (mypetsc) PetscCall(ISDestroy(&ispetsc));
4801447629fSBarry Smith   PetscFunctionReturn(0);
4811447629fSBarry Smith }
4821447629fSBarry Smith 
4831447629fSBarry Smith /*@C
4841447629fSBarry Smith    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
4851447629fSBarry Smith 
4861447629fSBarry Smith    Collective on IS
4871447629fSBarry Smith 
4881447629fSBarry Smith    Input Parameters:
4891447629fSBarry Smith +  isapp - index set that defines an ordering
4901447629fSBarry Smith -  ispetsc - index set that defines another ordering (may be NULL to use the
4911447629fSBarry Smith              natural ordering)
4921447629fSBarry Smith 
4931447629fSBarry Smith    Output Parameter:
4941447629fSBarry Smith .  aoout - the new application ordering
4951447629fSBarry Smith 
4961447629fSBarry Smith    Level: beginner
4971447629fSBarry Smith 
49895452b02SPatrick Sanan     Notes:
49995452b02SPatrick Sanan     The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
5001447629fSBarry Smith            that is there cannot be any "holes".
5011447629fSBarry Smith            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
502db781477SPatrick Sanan .seealso: `AOCreateMemoryScalable()`, `AODestroy()`
5031447629fSBarry Smith @*/
5049371c9d4SSatish Balay PetscErrorCode AOCreateMemoryScalableIS(IS isapp, IS ispetsc, AO *aoout) {
5051447629fSBarry Smith   MPI_Comm comm;
5061447629fSBarry Smith   AO       ao;
5071447629fSBarry Smith 
5081447629fSBarry Smith   PetscFunctionBegin;
5099566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)isapp, &comm));
5109566063dSJacob Faibussowitsch   PetscCall(AOCreate(comm, &ao));
5119566063dSJacob Faibussowitsch   PetscCall(AOSetIS(ao, isapp, ispetsc));
5129566063dSJacob Faibussowitsch   PetscCall(AOSetType(ao, AOMEMORYSCALABLE));
5139566063dSJacob Faibussowitsch   PetscCall(AOViewFromOptions(ao, NULL, "-ao_view"));
5141447629fSBarry Smith   *aoout = ao;
5151447629fSBarry Smith   PetscFunctionReturn(0);
5161447629fSBarry Smith }
517