xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision 28b400f66ebc7ae0049166a2294dfcd3df27e64b)
11447629fSBarry Smith 
21447629fSBarry Smith /*
31447629fSBarry Smith     The memory scalable AO application ordering routines. These store the
46bd6ae52SBarry Smith   orderings on each processor for that processor's range of values
51447629fSBarry Smith */
61447629fSBarry Smith 
71447629fSBarry Smith #include <../src/vec/is/ao/aoimpl.h>          /*I  "petscao.h"   I*/
81447629fSBarry Smith 
91447629fSBarry Smith typedef struct {
101447629fSBarry Smith   PetscInt    *app_loc;    /* app_loc[i] is the partner for the ith local PETSc slot */
111447629fSBarry Smith   PetscInt    *petsc_loc;  /* petsc_loc[j] is the partner for the jth local app slot */
121447629fSBarry Smith   PetscLayout map;         /* determines the local sizes of ao */
131447629fSBarry Smith } AO_MemoryScalable;
141447629fSBarry Smith 
151447629fSBarry Smith /*
166bd6ae52SBarry Smith        All processors ship the data to process 0 to be printed; note that this is not scalable because
176bd6ae52SBarry Smith        process 0 allocates space for all the orderings entry across all the processes
181447629fSBarry Smith */
191447629fSBarry Smith PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
201447629fSBarry Smith {
211447629fSBarry Smith   PetscMPIInt       rank,size;
221447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
231447629fSBarry Smith   PetscBool         iascii;
241447629fSBarry Smith   PetscMPIInt       tag_app,tag_petsc;
251447629fSBarry Smith   PetscLayout       map = aomems->map;
261447629fSBarry Smith   PetscInt          *app,*app_loc,*petsc,*petsc_loc,len,i,j;
271447629fSBarry Smith   MPI_Status        status;
281447629fSBarry Smith 
291447629fSBarry Smith   PetscFunctionBegin;
305f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
31*28b400f6SJacob Faibussowitsch   PetscCheck(iascii,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);
321447629fSBarry Smith 
335f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank));
345f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size));
351447629fSBarry Smith 
365f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetNewTag((PetscObject)ao,&tag_app));
375f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetNewTag((PetscObject)ao,&tag_petsc));
381447629fSBarry Smith 
39dd400576SPatrick Sanan   if (rank == 0) {
405f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %" PetscInt_FMT "\n",ao->N));
415f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscViewerASCIIPrintf(viewer,  "PETSc->App  App->PETSc\n"));
421447629fSBarry Smith 
435f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc2(map->N,&app,map->N,&petsc));
441447629fSBarry Smith     len  = map->n;
451447629fSBarry Smith     /* print local AO */
465f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscViewerASCIIPrintf(viewer,"Process [%d]\n",rank));
471447629fSBarry Smith     for (i=0; i<len; i++) {
485f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscViewerASCIIPrintf(viewer,"%3" PetscInt_FMT "  %3" PetscInt_FMT "    %3" PetscInt_FMT "  %3" PetscInt_FMT "\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]));
491447629fSBarry Smith     }
501447629fSBarry Smith 
511447629fSBarry Smith     /* recv and print off-processor's AO */
521447629fSBarry Smith     for (i=1; i<size; i++) {
531447629fSBarry Smith       len       = map->range[i+1] - map->range[i];
541447629fSBarry Smith       app_loc   = app  + map->range[i];
551447629fSBarry Smith       petsc_loc = petsc+ map->range[i];
565f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status));
575f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status));
585f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscViewerASCIIPrintf(viewer,"Process [%" PetscInt_FMT "]\n",i));
591447629fSBarry Smith       for (j=0; j<len; j++) {
605f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscViewerASCIIPrintf(viewer,"%3" PetscInt_FMT "  %3" PetscInt_FMT "    %3" PetscInt_FMT "  %3" PetscInt_FMT "\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]));
611447629fSBarry Smith       }
621447629fSBarry Smith     }
635f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree2(app,petsc));
641447629fSBarry Smith 
651447629fSBarry Smith   } else {
661447629fSBarry Smith     /* send values */
675f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao)));
685f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao)));
691447629fSBarry Smith   }
705f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscViewerFlush(viewer));
711447629fSBarry Smith   PetscFunctionReturn(0);
721447629fSBarry Smith }
731447629fSBarry Smith 
741447629fSBarry Smith PetscErrorCode AODestroy_MemoryScalable(AO ao)
751447629fSBarry Smith {
761447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
771447629fSBarry Smith 
781447629fSBarry Smith   PetscFunctionBegin;
795f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree2(aomems->app_loc,aomems->petsc_loc));
805f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLayoutDestroy(&aomems->map));
815f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(aomems));
821447629fSBarry Smith   PetscFunctionReturn(0);
831447629fSBarry Smith }
841447629fSBarry Smith 
851447629fSBarry Smith /*
861447629fSBarry Smith    Input Parameters:
871447629fSBarry Smith +   ao - the application ordering context
881447629fSBarry Smith .   n  - the number of integers in ia[]
891447629fSBarry Smith .   ia - the integers; these are replaced with their mapped value
901447629fSBarry Smith -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
911447629fSBarry Smith 
921447629fSBarry Smith    Output Parameter:
931447629fSBarry Smith .   ia - the mapped interges
941447629fSBarry Smith  */
956bd6ae52SBarry Smith PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,const PetscInt *maploc)
961447629fSBarry Smith {
971447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
981447629fSBarry Smith   MPI_Comm          comm;
991447629fSBarry Smith   PetscMPIInt       rank,size,tag1,tag2;
10076ec1555SBarry Smith   PetscInt          *owner,*start,*sizes,nsends,nreceives;
1011447629fSBarry Smith   PetscInt          nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
1026bd6ae52SBarry Smith   const PetscInt    *owners = aomems->map->range;
1031447629fSBarry Smith   MPI_Request       *send_waits,*recv_waits,*send_waits2,*recv_waits2;
1041447629fSBarry Smith   MPI_Status        recv_status;
1051447629fSBarry Smith   PetscMPIInt       nindices,source,widx;
1061447629fSBarry Smith   PetscInt          *rbuf,*sbuf;
1071447629fSBarry Smith   MPI_Status        *send_status,*send_status2;
1081447629fSBarry Smith 
1091447629fSBarry Smith   PetscFunctionBegin;
1105f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetComm((PetscObject)ao,&comm));
1115f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_rank(comm,&rank));
1125f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_size(comm,&size));
1131447629fSBarry Smith 
1141447629fSBarry Smith   /*  first count number of contributors to each processor */
1155f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(size,&start));
1165f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscCalloc2(2*size,&sizes,n,&owner));
1171447629fSBarry Smith 
1181447629fSBarry Smith   j       = 0;
1191447629fSBarry Smith   lastidx = -1;
1201447629fSBarry Smith   for (i=0; i<n; i++) {
1216bd6ae52SBarry Smith     if (ia[i] < 0) owner[i] = -1; /* mark negative entries (which are not to be mapped) with a special negative value */
1226bd6ae52SBarry Smith     if (ia[i] >= ao->N) owner[i] = -2; /* mark out of range entries with special negative value */
1236bd6ae52SBarry Smith     else {
1241447629fSBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
1251447629fSBarry Smith       if (lastidx > (idx = ia[i])) j = 0;
1261447629fSBarry Smith       lastidx = idx;
1271447629fSBarry Smith       for (; j<size; j++) {
1281447629fSBarry Smith         if (idx >= owners[j] && idx < owners[j+1]) {
12976ec1555SBarry Smith           sizes[2*j]++;     /* num of indices to be sent */
13076ec1555SBarry Smith           sizes[2*j+1] = 1; /* send to proc[j] */
1311447629fSBarry Smith           owner[i]     = j;
1321447629fSBarry Smith           break;
1331447629fSBarry Smith         }
1341447629fSBarry Smith       }
1351447629fSBarry Smith     }
1366bd6ae52SBarry Smith   }
13776ec1555SBarry Smith   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
1381447629fSBarry Smith   nsends        = 0;
13976ec1555SBarry Smith   for (i=0; i<size; i++) nsends += sizes[2*i+1];
1401447629fSBarry Smith 
1411447629fSBarry Smith   /* inform other processors of number of messages and max length*/
1425f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMaxSum(comm,sizes,&nmax,&nreceives));
1431447629fSBarry Smith 
1441447629fSBarry Smith   /* allocate arrays */
1455f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetNewTag((PetscObject)ao,&tag1));
1465f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetNewTag((PetscObject)ao,&tag2));
1471447629fSBarry Smith 
1485f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits));
1495f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2));
1501447629fSBarry Smith 
1515f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status));
1525f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2));
1531447629fSBarry Smith 
1541447629fSBarry Smith   /* post 1st receives: receive others requests
1551447629fSBarry Smith      since we don't know how long each individual message is we
1561447629fSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1571447629fSBarry Smith      this is a lot of wasted space.
1581447629fSBarry Smith   */
1591447629fSBarry Smith   for (i=0,count=0; i<nreceives; i++) {
1605f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++));
1611447629fSBarry Smith   }
1621447629fSBarry Smith 
1631447629fSBarry Smith   /* do 1st sends:
1641447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1651447629fSBarry Smith          the ith processor
1661447629fSBarry Smith   */
1671447629fSBarry Smith   start[0] = 0;
16876ec1555SBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
1691447629fSBarry Smith   for (i=0; i<n; i++) {
1701447629fSBarry Smith     j = owner[i];
1716bd6ae52SBarry Smith     if (j == -1) continue; /* do not remap negative entries in ia[] */
1726bd6ae52SBarry Smith     else if (j == -2) { /* out of range entries get mapped to -1 */
1736bd6ae52SBarry Smith       ia[i] = -1;
1746bd6ae52SBarry Smith       continue;
1756bd6ae52SBarry Smith     } else if (j != rank) {
1761447629fSBarry Smith       sindices[start[j]++]  = ia[i];
1771447629fSBarry Smith     } else { /* compute my own map */
1781447629fSBarry Smith       ia[i] = maploc[ia[i]-owners[rank]];
1791447629fSBarry Smith     }
1801447629fSBarry Smith   }
1811447629fSBarry Smith 
1821447629fSBarry Smith   start[0] = 0;
18376ec1555SBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
1841447629fSBarry Smith   for (i=0,count=0; i<size; i++) {
18576ec1555SBarry Smith     if (sizes[2*i+1]) {
1861447629fSBarry Smith       /* send my request to others */
1875f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count));
1881447629fSBarry Smith       /* post receive for the answer of my request */
1895f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count));
1901447629fSBarry Smith       count++;
1911447629fSBarry Smith     }
1921447629fSBarry Smith   }
1932c71b3e2SJacob Faibussowitsch   PetscCheckFalse(nsends != count,comm,PETSC_ERR_SUP,"nsends %" PetscInt_FMT " != count %" PetscInt_FMT,nsends,count);
1941447629fSBarry Smith 
1951447629fSBarry Smith   /* wait on 1st sends */
1961447629fSBarry Smith   if (nsends) {
1975f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Waitall(nsends,send_waits,send_status));
1981447629fSBarry Smith   }
1991447629fSBarry Smith 
2001447629fSBarry Smith   /* 1st recvs: other's requests */
2011447629fSBarry Smith   for (j=0; j< nreceives; j++) {
2025f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Waitany(nreceives,recv_waits,&widx,&recv_status)); /* idx: index of handle for operation that completed */
2035f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Get_count(&recv_status,MPIU_INT,&nindices));
2041447629fSBarry Smith     rbuf   = rindices+nmax*widx; /* global index */
2051447629fSBarry Smith     source = recv_status.MPI_SOURCE;
2061447629fSBarry Smith 
2071447629fSBarry Smith     /* compute mapping */
2081447629fSBarry Smith     sbuf = rbuf;
2091447629fSBarry Smith     for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];
2101447629fSBarry Smith 
2111447629fSBarry Smith     /* send mapping back to the sender */
2125f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx));
2131447629fSBarry Smith   }
2141447629fSBarry Smith 
2151447629fSBarry Smith   /* wait on 2nd sends */
2161447629fSBarry Smith   if (nreceives) {
2175f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Waitall(nreceives,send_waits2,send_status2));
2181447629fSBarry Smith   }
2191447629fSBarry Smith 
2201447629fSBarry Smith   /* 2nd recvs: for the answer of my request */
2211447629fSBarry Smith   for (j=0; j< nsends; j++) {
2225f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Waitany(nsends,recv_waits2,&widx,&recv_status));
2235f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Get_count(&recv_status,MPIU_INT,&nindices));
2241447629fSBarry Smith     source = recv_status.MPI_SOURCE;
2251447629fSBarry Smith     /* pack output ia[] */
2261447629fSBarry Smith     rbuf  = sindices2+start[source];
2271447629fSBarry Smith     count = 0;
2281447629fSBarry Smith     for (i=0; i<n; i++) {
2291447629fSBarry Smith       if (source == owner[i]) ia[i] = rbuf[count++];
2301447629fSBarry Smith     }
2311447629fSBarry Smith   }
2321447629fSBarry Smith 
2331447629fSBarry Smith   /* free arrays */
2345f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(start));
2355f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree2(sizes,owner));
2365f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree2(rindices,recv_waits));
2375f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree2(rindices2,recv_waits2));
2385f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree3(sindices,send_waits,send_status));
2395f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree3(sindices2,send_waits2,send_status2));
2401447629fSBarry Smith   PetscFunctionReturn(0);
2411447629fSBarry Smith }
2421447629fSBarry Smith 
2431447629fSBarry Smith PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
2441447629fSBarry Smith {
2451447629fSBarry Smith   AO_MemoryScalable *aomems  = (AO_MemoryScalable*)ao->data;
2461447629fSBarry Smith   PetscInt          *app_loc = aomems->app_loc;
2471447629fSBarry Smith 
2481447629fSBarry Smith   PetscFunctionBegin;
2495f80ce2aSJacob Faibussowitsch   CHKERRQ(AOMap_MemoryScalable_private(ao,n,ia,app_loc));
2501447629fSBarry Smith   PetscFunctionReturn(0);
2511447629fSBarry Smith }
2521447629fSBarry Smith 
2531447629fSBarry Smith PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
2541447629fSBarry Smith {
2551447629fSBarry Smith   AO_MemoryScalable *aomems    = (AO_MemoryScalable*)ao->data;
2561447629fSBarry Smith   PetscInt          *petsc_loc = aomems->petsc_loc;
2571447629fSBarry Smith 
2581447629fSBarry Smith   PetscFunctionBegin;
2595f80ce2aSJacob Faibussowitsch   CHKERRQ(AOMap_MemoryScalable_private(ao,n,ia,petsc_loc));
2601447629fSBarry Smith   PetscFunctionReturn(0);
2611447629fSBarry Smith }
2621447629fSBarry Smith 
2631447629fSBarry Smith static struct _AOOps AOOps_MemoryScalable = {
264267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(view,AOView_MemoryScalable),
265267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(destroy,AODestroy_MemoryScalable),
266267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(petsctoapplication,AOPetscToApplication_MemoryScalable),
267267267bdSJacob Faibussowitsch   PetscDesignatedInitializer(applicationtopetsc,AOApplicationToPetsc_MemoryScalable),
2681447629fSBarry Smith };
2691447629fSBarry Smith 
2701447629fSBarry Smith PetscErrorCode  AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
2711447629fSBarry Smith {
2721447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
2731447629fSBarry Smith   PetscLayout       map     = aomems->map;
2741447629fSBarry Smith   PetscInt          n_local = map->n,i,j;
2751447629fSBarry Smith   PetscMPIInt       rank,size,tag;
27676ec1555SBarry Smith   PetscInt          *owner,*start,*sizes,nsends,nreceives;
2771447629fSBarry Smith   PetscInt          nmax,count,*sindices,*rindices,idx,lastidx;
2781447629fSBarry Smith   PetscInt          *owners = aomems->map->range;
2791447629fSBarry Smith   MPI_Request       *send_waits,*recv_waits;
2801447629fSBarry Smith   MPI_Status        recv_status;
2811447629fSBarry Smith   PetscMPIInt       nindices,widx;
2821447629fSBarry Smith   PetscInt          *rbuf;
2831447629fSBarry Smith   PetscInt          n=napp,ip,ia;
2841447629fSBarry Smith   MPI_Status        *send_status;
2851447629fSBarry Smith 
2861447629fSBarry Smith   PetscFunctionBegin;
2875f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscArrayzero(aomap_loc,n_local));
2881447629fSBarry Smith 
2895f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_rank(comm,&rank));
2905f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_size(comm,&size));
2911447629fSBarry Smith 
2921447629fSBarry Smith   /*  first count number of contributors (of from_array[]) to each processor */
2935f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscCalloc1(2*size,&sizes));
2945f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(n,&owner));
2951447629fSBarry Smith 
2961447629fSBarry Smith   j       = 0;
2971447629fSBarry Smith   lastidx = -1;
2981447629fSBarry Smith   for (i=0; i<n; i++) {
2991447629fSBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
3001447629fSBarry Smith     if (lastidx > (idx = from_array[i])) j = 0;
3011447629fSBarry Smith     lastidx = idx;
3021447629fSBarry Smith     for (; j<size; j++) {
3031447629fSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
30476ec1555SBarry Smith         sizes[2*j]  += 2; /* num of indices to be sent - in pairs (ip,ia) */
30576ec1555SBarry Smith         sizes[2*j+1] = 1; /* send to proc[j] */
3061447629fSBarry Smith         owner[i]     = j;
3071447629fSBarry Smith         break;
3081447629fSBarry Smith       }
3091447629fSBarry Smith     }
3101447629fSBarry Smith   }
31176ec1555SBarry Smith   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
3121447629fSBarry Smith   nsends        = 0;
31376ec1555SBarry Smith   for (i=0; i<size; i++) nsends += sizes[2*i+1];
3141447629fSBarry Smith 
3151447629fSBarry Smith   /* inform other processors of number of messages and max length*/
3165f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMaxSum(comm,sizes,&nmax,&nreceives));
3171447629fSBarry Smith 
3181447629fSBarry Smith   /* allocate arrays */
3195f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetNewTag((PetscObject)ao,&tag));
3205f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits));
3215f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status));
3225f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(size,&start));
3231447629fSBarry Smith 
3241447629fSBarry Smith   /* post receives: */
3251447629fSBarry Smith   for (i=0; i<nreceives; i++) {
3265f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i));
3271447629fSBarry Smith   }
3281447629fSBarry Smith 
3291447629fSBarry Smith   /* do sends:
3301447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3311447629fSBarry Smith          the ith processor
3321447629fSBarry Smith   */
3331447629fSBarry Smith   start[0] = 0;
33476ec1555SBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
3351447629fSBarry Smith   for (i=0; i<n; i++) {
3361447629fSBarry Smith     j = owner[i];
3371447629fSBarry Smith     if (j != rank) {
3381447629fSBarry Smith       ip                   = from_array[i];
3391447629fSBarry Smith       ia                   = to_array[i];
3401447629fSBarry Smith       sindices[start[j]++] = ip;
3411447629fSBarry Smith       sindices[start[j]++] = ia;
3421447629fSBarry Smith     } else { /* compute my own map */
3431447629fSBarry Smith       ip            = from_array[i] - owners[rank];
3441447629fSBarry Smith       ia            = to_array[i];
3451447629fSBarry Smith       aomap_loc[ip] = ia;
3461447629fSBarry Smith     }
3471447629fSBarry Smith   }
3481447629fSBarry Smith 
3491447629fSBarry Smith   start[0] = 0;
35076ec1555SBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
3511447629fSBarry Smith   for (i=0,count=0; i<size; i++) {
35276ec1555SBarry Smith     if (sizes[2*i+1]) {
3535f80ce2aSJacob Faibussowitsch       CHKERRMPI(MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count));
3541447629fSBarry Smith       count++;
3551447629fSBarry Smith     }
3561447629fSBarry Smith   }
3572c71b3e2SJacob Faibussowitsch   PetscCheckFalse(nsends != count,comm,PETSC_ERR_SUP,"nsends %" PetscInt_FMT " != count %" PetscInt_FMT,nsends,count);
3581447629fSBarry Smith 
3591447629fSBarry Smith   /* wait on sends */
3601447629fSBarry Smith   if (nsends) {
3615f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Waitall(nsends,send_waits,send_status));
3621447629fSBarry Smith   }
3631447629fSBarry Smith 
3641447629fSBarry Smith   /* recvs */
3651447629fSBarry Smith   count=0;
3661447629fSBarry Smith   for (j= nreceives; j>0; j--) {
3675f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Waitany(nreceives,recv_waits,&widx,&recv_status));
3685f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Get_count(&recv_status,MPIU_INT,&nindices));
3691447629fSBarry Smith     rbuf = rindices+nmax*widx; /* global index */
3701447629fSBarry Smith 
3711447629fSBarry Smith     /* compute local mapping */
3721447629fSBarry Smith     for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
3731447629fSBarry Smith       ip            = rbuf[i] - owners[rank]; /* local index */
3741447629fSBarry Smith       ia            = rbuf[i+1];
3751447629fSBarry Smith       aomap_loc[ip] = ia;
3761447629fSBarry Smith     }
3771447629fSBarry Smith     count++;
3781447629fSBarry Smith   }
3791447629fSBarry Smith 
3805f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(start));
3815f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree3(sindices,send_waits,send_status));
3825f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree2(rindices,recv_waits));
3835f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(owner));
3845f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(sizes));
3851447629fSBarry Smith   PetscFunctionReturn(0);
3861447629fSBarry Smith }
3871447629fSBarry Smith 
3888cc058d9SJed Brown PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
3891447629fSBarry Smith {
3901447629fSBarry Smith   IS                isapp=ao->isapp,ispetsc=ao->ispetsc;
3911447629fSBarry Smith   const PetscInt    *mypetsc,*myapp;
3921447629fSBarry Smith   PetscInt          napp,n_local,N,i,start,*petsc,*lens,*disp;
3931447629fSBarry Smith   MPI_Comm          comm;
3941447629fSBarry Smith   AO_MemoryScalable *aomems;
3951447629fSBarry Smith   PetscLayout       map;
3961447629fSBarry Smith   PetscMPIInt       size,rank;
3971447629fSBarry Smith 
3981447629fSBarry Smith   PetscFunctionBegin;
399*28b400f6SJacob Faibussowitsch   PetscCheck(isapp,PetscObjectComm((PetscObject)ao),PETSC_ERR_ARG_WRONGSTATE,"AOSetIS() must be called before AOSetType()");
4001447629fSBarry Smith   /* create special struct aomems */
4015f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscNewLog(ao,&aomems));
4021447629fSBarry Smith   ao->data = (void*) aomems;
4035f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps)));
4045f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE));
4051447629fSBarry Smith 
4061447629fSBarry Smith   /* transmit all local lengths of isapp to all processors */
4075f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetComm((PetscObject)isapp,&comm));
4085f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_size(comm, &size));
4095f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_rank(comm, &rank));
4105f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc2(size,&lens,size,&disp));
4115f80ce2aSJacob Faibussowitsch   CHKERRQ(ISGetLocalSize(isapp,&napp));
4125f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm));
4131447629fSBarry Smith 
4141447629fSBarry Smith   N = 0;
4151447629fSBarry Smith   for (i = 0; i < size; i++) {
4161447629fSBarry Smith     disp[i] = N;
4171447629fSBarry Smith     N      += lens[i];
4181447629fSBarry Smith   }
4191447629fSBarry Smith 
4201447629fSBarry Smith   /* If ispetsc is 0 then use "natural" numbering */
4211447629fSBarry Smith   if (napp) {
4221447629fSBarry Smith     if (!ispetsc) {
4231447629fSBarry Smith       start = disp[rank];
4245f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscMalloc1(napp+1, &petsc));
4251447629fSBarry Smith       for (i=0; i<napp; i++) petsc[i] = start + i;
4261447629fSBarry Smith     } else {
4275f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetIndices(ispetsc,&mypetsc));
4281447629fSBarry Smith       petsc = (PetscInt*)mypetsc;
4291447629fSBarry Smith     }
4304a2f8832SBarry Smith   } else {
4314a2f8832SBarry Smith     petsc = NULL;
4321447629fSBarry Smith   }
4331447629fSBarry Smith 
4341447629fSBarry Smith   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
4355f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLayoutCreate(comm,&map));
4361447629fSBarry Smith   map->bs = 1;
4371447629fSBarry Smith   map->N  = N;
4385f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLayoutSetUp(map));
4391447629fSBarry Smith 
4401447629fSBarry Smith   ao->N       = N;
4411447629fSBarry Smith   ao->n       = map->n;
4421447629fSBarry Smith   aomems->map = map;
4431447629fSBarry Smith 
4441447629fSBarry Smith   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
4451447629fSBarry Smith   n_local = map->n;
4465f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscCalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc));
4475f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt)));
4485f80ce2aSJacob Faibussowitsch   CHKERRQ(ISGetIndices(isapp,&myapp));
4491447629fSBarry Smith 
4505f80ce2aSJacob Faibussowitsch   CHKERRQ(AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc));
4515f80ce2aSJacob Faibussowitsch   CHKERRQ(AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc));
4521447629fSBarry Smith 
4535f80ce2aSJacob Faibussowitsch   CHKERRQ(ISRestoreIndices(isapp,&myapp));
4541447629fSBarry Smith   if (napp) {
4551447629fSBarry Smith     if (ispetsc) {
4565f80ce2aSJacob Faibussowitsch       CHKERRQ(ISRestoreIndices(ispetsc,&mypetsc));
4571447629fSBarry Smith     } else {
4585f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscFree(petsc));
4591447629fSBarry Smith     }
4601447629fSBarry Smith   }
4615f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree2(lens,disp));
4621447629fSBarry Smith   PetscFunctionReturn(0);
4631447629fSBarry Smith }
4641447629fSBarry Smith 
4651447629fSBarry Smith /*@C
4661447629fSBarry Smith    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
4671447629fSBarry Smith 
468d083f849SBarry Smith    Collective
4691447629fSBarry Smith 
4701447629fSBarry Smith    Input Parameters:
4711447629fSBarry Smith +  comm - MPI communicator that is to share AO
4721447629fSBarry Smith .  napp - size of integer arrays
4731447629fSBarry Smith .  myapp - integer array that defines an ordering
4741447629fSBarry Smith -  mypetsc - integer array that defines another ordering (may be NULL to
4751447629fSBarry Smith              indicate the natural ordering, that is 0,1,2,3,...)
4761447629fSBarry Smith 
4771447629fSBarry Smith    Output Parameter:
4781447629fSBarry Smith .  aoout - the new application ordering
4791447629fSBarry Smith 
4801447629fSBarry Smith    Level: beginner
4811447629fSBarry Smith 
48295452b02SPatrick Sanan     Notes:
48395452b02SPatrick Sanan     The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
4841447629fSBarry Smith            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
4851447629fSBarry Smith            Comparing with AOCreateBasic(), this routine trades memory with message communication.
4861447629fSBarry Smith 
4871447629fSBarry Smith .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
4881447629fSBarry Smith @*/
4891447629fSBarry Smith PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
4901447629fSBarry Smith {
4911447629fSBarry Smith   IS             isapp,ispetsc;
4921447629fSBarry Smith   const PetscInt *app=myapp,*petsc=mypetsc;
4931447629fSBarry Smith 
4941447629fSBarry Smith   PetscFunctionBegin;
4955f80ce2aSJacob Faibussowitsch   CHKERRQ(ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp));
4961447629fSBarry Smith   if (mypetsc) {
4975f80ce2aSJacob Faibussowitsch     CHKERRQ(ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc));
4981447629fSBarry Smith   } else {
4991447629fSBarry Smith     ispetsc = NULL;
5001447629fSBarry Smith   }
5015f80ce2aSJacob Faibussowitsch   CHKERRQ(AOCreateMemoryScalableIS(isapp,ispetsc,aoout));
5025f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&isapp));
5031447629fSBarry Smith   if (mypetsc) {
5045f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&ispetsc));
5051447629fSBarry Smith   }
5061447629fSBarry Smith   PetscFunctionReturn(0);
5071447629fSBarry Smith }
5081447629fSBarry Smith 
5091447629fSBarry Smith /*@C
5101447629fSBarry Smith    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
5111447629fSBarry Smith 
5121447629fSBarry Smith    Collective on IS
5131447629fSBarry Smith 
5141447629fSBarry Smith    Input Parameters:
5151447629fSBarry Smith +  isapp - index set that defines an ordering
5161447629fSBarry Smith -  ispetsc - index set that defines another ordering (may be NULL to use the
5171447629fSBarry Smith              natural ordering)
5181447629fSBarry Smith 
5191447629fSBarry Smith    Output Parameter:
5201447629fSBarry Smith .  aoout - the new application ordering
5211447629fSBarry Smith 
5221447629fSBarry Smith    Level: beginner
5231447629fSBarry Smith 
52495452b02SPatrick Sanan     Notes:
52595452b02SPatrick Sanan     The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
5261447629fSBarry Smith            that is there cannot be any "holes".
5271447629fSBarry Smith            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
5281447629fSBarry Smith .seealso: AOCreateMemoryScalable(),  AODestroy()
5291447629fSBarry Smith @*/
5301447629fSBarry Smith PetscErrorCode  AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
5311447629fSBarry Smith {
5321447629fSBarry Smith   MPI_Comm       comm;
5331447629fSBarry Smith   AO             ao;
5341447629fSBarry Smith 
5351447629fSBarry Smith   PetscFunctionBegin;
5365f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectGetComm((PetscObject)isapp,&comm));
5375f80ce2aSJacob Faibussowitsch   CHKERRQ(AOCreate(comm,&ao));
5385f80ce2aSJacob Faibussowitsch   CHKERRQ(AOSetIS(ao,isapp,ispetsc));
5395f80ce2aSJacob Faibussowitsch   CHKERRQ(AOSetType(ao,AOMEMORYSCALABLE));
5405f80ce2aSJacob Faibussowitsch   CHKERRQ(AOViewFromOptions(ao,NULL,"-ao_view"));
5411447629fSBarry Smith   *aoout = ao;
5421447629fSBarry Smith   PetscFunctionReturn(0);
5431447629fSBarry Smith }
544