xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision 8cc058d9cd56c1ccb3be12a47760ddfc446aaffc)
11447629fSBarry Smith 
21447629fSBarry Smith /*
31447629fSBarry Smith     The memory scalable AO application ordering routines. These store the
41447629fSBarry Smith   local orderings on each processor.
51447629fSBarry Smith */
61447629fSBarry Smith 
71447629fSBarry Smith #include <../src/vec/is/ao/aoimpl.h>          /*I  "petscao.h"   I*/
81447629fSBarry Smith 
91447629fSBarry Smith typedef struct {
101447629fSBarry Smith   PetscInt    *app_loc;    /* app_loc[i] is the partner for the ith local PETSc slot */
111447629fSBarry Smith   PetscInt    *petsc_loc;  /* petsc_loc[j] is the partner for the jth local app slot */
121447629fSBarry Smith   PetscLayout map;         /* determines the local sizes of ao */
131447629fSBarry Smith } AO_MemoryScalable;
141447629fSBarry Smith 
151447629fSBarry Smith /*
161447629fSBarry Smith        All processors have the same data so processor 1 prints it
171447629fSBarry Smith */
181447629fSBarry Smith #undef __FUNCT__
191447629fSBarry Smith #define __FUNCT__ "AOView_MemoryScalable"
201447629fSBarry Smith PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
211447629fSBarry Smith {
221447629fSBarry Smith   PetscErrorCode    ierr;
231447629fSBarry Smith   PetscMPIInt       rank,size;
241447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
251447629fSBarry Smith   PetscBool         iascii;
261447629fSBarry Smith   PetscMPIInt       tag_app,tag_petsc;
271447629fSBarry Smith   PetscLayout       map = aomems->map;
281447629fSBarry Smith   PetscInt          *app,*app_loc,*petsc,*petsc_loc,len,i,j;
291447629fSBarry Smith   MPI_Status        status;
301447629fSBarry Smith 
311447629fSBarry Smith   PetscFunctionBegin;
321447629fSBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
331447629fSBarry Smith   if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);
341447629fSBarry Smith 
351447629fSBarry Smith   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);CHKERRQ(ierr);
361447629fSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);CHKERRQ(ierr);
371447629fSBarry Smith 
381447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_app);CHKERRQ(ierr);
391447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);CHKERRQ(ierr);
401447629fSBarry Smith 
411447629fSBarry Smith   if (!rank) {
421447629fSBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);CHKERRQ(ierr);
431447629fSBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,  "PETSc->App  App->PETSc\n");CHKERRQ(ierr);
441447629fSBarry Smith 
451447629fSBarry Smith     ierr = PetscMalloc2(map->N,PetscInt,&app,map->N,PetscInt,&petsc);CHKERRQ(ierr);
461447629fSBarry Smith     len  = map->n;
471447629fSBarry Smith     /* print local AO */
481447629fSBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",rank);CHKERRQ(ierr);
491447629fSBarry Smith     for (i=0; i<len; i++) {
501447629fSBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);CHKERRQ(ierr);
511447629fSBarry Smith     }
521447629fSBarry Smith 
531447629fSBarry Smith     /* recv and print off-processor's AO */
541447629fSBarry Smith     for (i=1; i<size; i++) {
551447629fSBarry Smith       len       = map->range[i+1] - map->range[i];
561447629fSBarry Smith       app_loc   = app  + map->range[i];
571447629fSBarry Smith       petsc_loc = petsc+ map->range[i];
581447629fSBarry Smith       ierr      = MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
591447629fSBarry Smith       ierr      = MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
601447629fSBarry Smith       ierr      = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);CHKERRQ(ierr);
611447629fSBarry Smith       for (j=0; j<len; j++) {
621447629fSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);CHKERRQ(ierr);
631447629fSBarry Smith       }
641447629fSBarry Smith     }
651447629fSBarry Smith     ierr = PetscFree2(app,petsc);CHKERRQ(ierr);
661447629fSBarry Smith 
671447629fSBarry Smith   } else {
681447629fSBarry Smith     /* send values */
691447629fSBarry Smith     ierr = MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
701447629fSBarry Smith     ierr = MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
711447629fSBarry Smith   }
721447629fSBarry Smith   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
731447629fSBarry Smith   PetscFunctionReturn(0);
741447629fSBarry Smith }
751447629fSBarry Smith 
761447629fSBarry Smith #undef __FUNCT__
771447629fSBarry Smith #define __FUNCT__ "AODestroy_MemoryScalable"
781447629fSBarry Smith PetscErrorCode AODestroy_MemoryScalable(AO ao)
791447629fSBarry Smith {
801447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
811447629fSBarry Smith   PetscErrorCode    ierr;
821447629fSBarry Smith 
831447629fSBarry Smith   PetscFunctionBegin;
841447629fSBarry Smith   ierr = PetscFree2(aomems->app_loc,aomems->petsc_loc);CHKERRQ(ierr);
851447629fSBarry Smith   ierr = PetscLayoutDestroy(&aomems->map);CHKERRQ(ierr);
861447629fSBarry Smith   ierr = PetscFree(aomems);CHKERRQ(ierr);
871447629fSBarry Smith   PetscFunctionReturn(0);
881447629fSBarry Smith }
891447629fSBarry Smith 
901447629fSBarry Smith /*
911447629fSBarry Smith    Input Parameters:
921447629fSBarry Smith +   ao - the application ordering context
931447629fSBarry Smith .   n  - the number of integers in ia[]
941447629fSBarry Smith .   ia - the integers; these are replaced with their mapped value
951447629fSBarry Smith -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
961447629fSBarry Smith 
971447629fSBarry Smith    Output Parameter:
981447629fSBarry Smith .   ia - the mapped interges
991447629fSBarry Smith  */
1001447629fSBarry Smith #undef __FUNCT__
1011447629fSBarry Smith #define __FUNCT__ "AOMap_MemoryScalable_private"
1021447629fSBarry Smith PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,PetscInt *maploc)
1031447629fSBarry Smith {
1041447629fSBarry Smith   PetscErrorCode    ierr;
1051447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
1061447629fSBarry Smith   MPI_Comm          comm;
1071447629fSBarry Smith   PetscMPIInt       rank,size,tag1,tag2;
1081447629fSBarry Smith   PetscInt          *owner,*start,*nprocs,nsends,nreceives;
1091447629fSBarry Smith   PetscInt          nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
1101447629fSBarry Smith   PetscInt          *owners = aomems->map->range;
1111447629fSBarry Smith   MPI_Request       *send_waits,*recv_waits,*send_waits2,*recv_waits2;
1121447629fSBarry Smith   MPI_Status        recv_status;
1131447629fSBarry Smith   PetscMPIInt       nindices,source,widx;
1141447629fSBarry Smith   PetscInt          *rbuf,*sbuf;
1151447629fSBarry Smith   MPI_Status        *send_status,*send_status2;
1161447629fSBarry Smith 
1171447629fSBarry Smith   PetscFunctionBegin;
1181447629fSBarry Smith   ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr);
1191447629fSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1201447629fSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1211447629fSBarry Smith 
1221447629fSBarry Smith   /*  first count number of contributors to each processor */
1231447629fSBarry Smith   ierr = PetscMalloc2(2*size,PetscInt,&nprocs,size,PetscInt,&start);CHKERRQ(ierr);
1241447629fSBarry Smith   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1251447629fSBarry Smith   ierr = PetscMalloc(n*sizeof(PetscInt),&owner);CHKERRQ(ierr);
1261447629fSBarry Smith   ierr = PetscMemzero(owner,n*sizeof(PetscInt));CHKERRQ(ierr);
1271447629fSBarry Smith 
1281447629fSBarry Smith   j       = 0;
1291447629fSBarry Smith   lastidx = -1;
1301447629fSBarry Smith   for (i=0; i<n; i++) {
1311447629fSBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
1321447629fSBarry Smith     if (lastidx > (idx = ia[i])) j = 0;
1331447629fSBarry Smith     lastidx = idx;
1341447629fSBarry Smith     for (; j<size; j++) {
1351447629fSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
1361447629fSBarry Smith         nprocs[2*j]++;     /* num of indices to be sent */
1371447629fSBarry Smith         nprocs[2*j+1] = 1; /* send to proc[j] */
1381447629fSBarry Smith         owner[i]      = j;
1391447629fSBarry Smith         break;
1401447629fSBarry Smith       }
1411447629fSBarry Smith     }
1421447629fSBarry Smith   }
1431447629fSBarry Smith   nprocs[2*rank]=nprocs[2*rank+1]=0; /* do not receive from self! */
1441447629fSBarry Smith   nsends        = 0;
1451447629fSBarry Smith   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
1461447629fSBarry Smith 
1471447629fSBarry Smith   /* inform other processors of number of messages and max length*/
1481447629fSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
1491447629fSBarry Smith 
1501447629fSBarry Smith   /* allocate arrays */
1511447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag1);CHKERRQ(ierr);
1521447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag2);CHKERRQ(ierr);
1531447629fSBarry Smith 
1541447629fSBarry Smith   ierr = PetscMalloc2(nreceives*nmax,PetscInt,&rindices,nreceives,MPI_Request,&recv_waits);CHKERRQ(ierr);
1551447629fSBarry Smith   ierr = PetscMalloc2(nsends*nmax,PetscInt,&rindices2,nsends,MPI_Request,&recv_waits2);CHKERRQ(ierr);
1561447629fSBarry Smith 
1571447629fSBarry Smith   ierr = PetscMalloc3(n,PetscInt,&sindices,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);CHKERRQ(ierr);
1581447629fSBarry Smith   ierr = PetscMalloc3(n,PetscInt,&sindices2,nreceives,MPI_Request,&send_waits2,nreceives,MPI_Status,&send_status2);CHKERRQ(ierr);
1591447629fSBarry Smith 
1601447629fSBarry Smith   /* post 1st receives: receive others requests
1611447629fSBarry Smith      since we don't know how long each individual message is we
1621447629fSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1631447629fSBarry Smith      this is a lot of wasted space.
1641447629fSBarry Smith   */
1651447629fSBarry Smith   for (i=0,count=0; i<nreceives; i++) {
1661447629fSBarry Smith     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr);
1671447629fSBarry Smith   }
1681447629fSBarry Smith 
1691447629fSBarry Smith   /* do 1st sends:
1701447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1711447629fSBarry Smith          the ith processor
1721447629fSBarry Smith   */
1731447629fSBarry Smith   start[0] = 0;
1741447629fSBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
1751447629fSBarry Smith   for (i=0; i<n; i++) {
1761447629fSBarry Smith     j = owner[i];
1771447629fSBarry Smith     if (j != rank) {
1781447629fSBarry Smith       sindices[start[j]++]  = ia[i];
1791447629fSBarry Smith     } else { /* compute my own map */
1801447629fSBarry Smith       if (ia[i] >= owners[rank] && ia[i] < owners[rank+1]) {
1811447629fSBarry Smith         ia[i] = maploc[ia[i]-owners[rank]];
1821447629fSBarry Smith       } else {
1831447629fSBarry Smith         ia[i] = -1;  /* ia[i] is not in the range of 0 and N-1, maps it to -1 */
1841447629fSBarry Smith       }
1851447629fSBarry Smith     }
1861447629fSBarry Smith   }
1871447629fSBarry Smith 
1881447629fSBarry Smith   start[0] = 0;
1891447629fSBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
1901447629fSBarry Smith   for (i=0,count=0; i<size; i++) {
1911447629fSBarry Smith     if (nprocs[2*i+1]) {
1921447629fSBarry Smith       /* send my request to others */
1931447629fSBarry Smith       ierr = MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag1,comm,send_waits+count);CHKERRQ(ierr);
1941447629fSBarry Smith       /* post receive for the answer of my request */
1951447629fSBarry Smith       ierr = MPI_Irecv(sindices2+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);CHKERRQ(ierr);
1961447629fSBarry Smith       count++;
1971447629fSBarry Smith     }
1981447629fSBarry Smith   }
1991447629fSBarry Smith   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
2001447629fSBarry Smith 
2011447629fSBarry Smith   /* wait on 1st sends */
2021447629fSBarry Smith   if (nsends) {
2031447629fSBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
2041447629fSBarry Smith   }
2051447629fSBarry Smith 
2061447629fSBarry Smith   /* 1st recvs: other's requests */
2071447629fSBarry Smith   for (j=0; j< nreceives; j++) {
2081447629fSBarry Smith     ierr   = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr); /* idx: index of handle for operation that completed */
2091447629fSBarry Smith     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
2101447629fSBarry Smith     rbuf   = rindices+nmax*widx; /* global index */
2111447629fSBarry Smith     source = recv_status.MPI_SOURCE;
2121447629fSBarry Smith 
2131447629fSBarry Smith     /* compute mapping */
2141447629fSBarry Smith     sbuf = rbuf;
2151447629fSBarry Smith     for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];
2161447629fSBarry Smith 
2171447629fSBarry Smith     /* send mapping back to the sender */
2181447629fSBarry Smith     ierr = MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);CHKERRQ(ierr);
2191447629fSBarry Smith   }
2201447629fSBarry Smith 
2211447629fSBarry Smith   /* wait on 2nd sends */
2221447629fSBarry Smith   if (nreceives) {
2231447629fSBarry Smith     ierr = MPI_Waitall(nreceives,send_waits2,send_status2);CHKERRQ(ierr);
2241447629fSBarry Smith   }
2251447629fSBarry Smith 
2261447629fSBarry Smith   /* 2nd recvs: for the answer of my request */
2271447629fSBarry Smith   for (j=0; j< nsends; j++) {
2281447629fSBarry Smith     ierr   = MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);CHKERRQ(ierr);
2291447629fSBarry Smith     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
2301447629fSBarry Smith     source = recv_status.MPI_SOURCE;
2311447629fSBarry Smith     /* pack output ia[] */
2321447629fSBarry Smith     rbuf  = sindices2+start[source];
2331447629fSBarry Smith     count = 0;
2341447629fSBarry Smith     for (i=0; i<n; i++) {
2351447629fSBarry Smith       if (source == owner[i]) ia[i] = rbuf[count++];
2361447629fSBarry Smith     }
2371447629fSBarry Smith   }
2381447629fSBarry Smith 
2391447629fSBarry Smith   /* free arrays */
2401447629fSBarry Smith   ierr = PetscFree2(nprocs,start);CHKERRQ(ierr);
2411447629fSBarry Smith   ierr = PetscFree(owner);CHKERRQ(ierr);
2421447629fSBarry Smith   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
2431447629fSBarry Smith   ierr = PetscFree2(rindices2,recv_waits2);CHKERRQ(ierr);
2441447629fSBarry Smith   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
2451447629fSBarry Smith   ierr = PetscFree3(sindices2,send_waits2,send_status2);CHKERRQ(ierr);
2461447629fSBarry Smith   PetscFunctionReturn(0);
2471447629fSBarry Smith }
2481447629fSBarry Smith 
2491447629fSBarry Smith #undef __FUNCT__
2501447629fSBarry Smith #define __FUNCT__ "AOPetscToApplication_MemoryScalable"
2511447629fSBarry Smith PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
2521447629fSBarry Smith {
2531447629fSBarry Smith   PetscErrorCode    ierr;
2541447629fSBarry Smith   AO_MemoryScalable *aomems  = (AO_MemoryScalable*)ao->data;
2551447629fSBarry Smith   PetscInt          *app_loc = aomems->app_loc;
2561447629fSBarry Smith 
2571447629fSBarry Smith   PetscFunctionBegin;
2581447629fSBarry Smith   ierr = AOMap_MemoryScalable_private(ao,n,ia,app_loc);CHKERRQ(ierr);
2591447629fSBarry Smith   PetscFunctionReturn(0);
2601447629fSBarry Smith }
2611447629fSBarry Smith 
2621447629fSBarry Smith #undef __FUNCT__
2631447629fSBarry Smith #define __FUNCT__ "AOApplicationToPetsc_MemoryScalable"
2641447629fSBarry Smith PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
2651447629fSBarry Smith {
2661447629fSBarry Smith   PetscErrorCode    ierr;
2671447629fSBarry Smith   AO_MemoryScalable *aomems    = (AO_MemoryScalable*)ao->data;
2681447629fSBarry Smith   PetscInt          *petsc_loc = aomems->petsc_loc;
2691447629fSBarry Smith 
2701447629fSBarry Smith   PetscFunctionBegin;
2711447629fSBarry Smith   ierr = AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);CHKERRQ(ierr);
2721447629fSBarry Smith   PetscFunctionReturn(0);
2731447629fSBarry Smith }
2741447629fSBarry Smith 
2751447629fSBarry Smith static struct _AOOps AOOps_MemoryScalable = {
2761447629fSBarry Smith   AOView_MemoryScalable,
2771447629fSBarry Smith   AODestroy_MemoryScalable,
2781447629fSBarry Smith   AOPetscToApplication_MemoryScalable,
2791447629fSBarry Smith   AOApplicationToPetsc_MemoryScalable,
2801447629fSBarry Smith   0,
2811447629fSBarry Smith   0,
2821447629fSBarry Smith   0,
2831447629fSBarry Smith   0
2841447629fSBarry Smith };
2851447629fSBarry Smith 
2861447629fSBarry Smith #undef __FUNCT__
2871447629fSBarry Smith #define __FUNCT__ "AOCreateMemoryScalable_private"
2881447629fSBarry Smith PetscErrorCode  AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
2891447629fSBarry Smith {
2901447629fSBarry Smith   PetscErrorCode    ierr;
2911447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
2921447629fSBarry Smith   PetscLayout       map     = aomems->map;
2931447629fSBarry Smith   PetscInt          n_local = map->n,i,j;
2941447629fSBarry Smith   PetscMPIInt       rank,size,tag;
2951447629fSBarry Smith   PetscInt          *owner,*start,*nprocs,nsends,nreceives;
2961447629fSBarry Smith   PetscInt          nmax,count,*sindices,*rindices,idx,lastidx;
2971447629fSBarry Smith   PetscInt          *owners = aomems->map->range;
2981447629fSBarry Smith   MPI_Request       *send_waits,*recv_waits;
2991447629fSBarry Smith   MPI_Status        recv_status;
3001447629fSBarry Smith   PetscMPIInt       nindices,widx;
3011447629fSBarry Smith   PetscInt          *rbuf;
3021447629fSBarry Smith   PetscInt          n=napp,ip,ia;
3031447629fSBarry Smith   MPI_Status        *send_status;
3041447629fSBarry Smith 
3051447629fSBarry Smith   PetscFunctionBegin;
3061447629fSBarry Smith   ierr = PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
3071447629fSBarry Smith 
3081447629fSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3091447629fSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3101447629fSBarry Smith 
3111447629fSBarry Smith   /*  first count number of contributors (of from_array[]) to each processor */
3121447629fSBarry Smith   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
3131447629fSBarry Smith   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
3141447629fSBarry Smith   ierr = PetscMalloc(n*sizeof(PetscInt),&owner);CHKERRQ(ierr);
3151447629fSBarry Smith 
3161447629fSBarry Smith   j       = 0;
3171447629fSBarry Smith   lastidx = -1;
3181447629fSBarry Smith   for (i=0; i<n; i++) {
3191447629fSBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
3201447629fSBarry Smith     if (lastidx > (idx = from_array[i])) j = 0;
3211447629fSBarry Smith     lastidx = idx;
3221447629fSBarry Smith     for (; j<size; j++) {
3231447629fSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
3241447629fSBarry Smith         nprocs[2*j]  += 2; /* num of indices to be sent - in pairs (ip,ia) */
3251447629fSBarry Smith         nprocs[2*j+1] = 1; /* send to proc[j] */
3261447629fSBarry Smith         owner[i]      = j;
3271447629fSBarry Smith         break;
3281447629fSBarry Smith       }
3291447629fSBarry Smith     }
3301447629fSBarry Smith   }
3311447629fSBarry Smith   nprocs[2*rank]=nprocs[2*rank+1]=0; /* do not receive from self! */
3321447629fSBarry Smith   nsends        = 0;
3331447629fSBarry Smith   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
3341447629fSBarry Smith 
3351447629fSBarry Smith   /* inform other processors of number of messages and max length*/
3361447629fSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
3371447629fSBarry Smith 
3381447629fSBarry Smith   /* allocate arrays */
3391447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag);CHKERRQ(ierr);
3401447629fSBarry Smith   ierr = PetscMalloc2(nreceives*nmax,PetscInt,&rindices,nreceives,MPI_Request,&recv_waits);CHKERRQ(ierr);
3411447629fSBarry Smith   ierr = PetscMalloc3(2*n,PetscInt,&sindices,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);CHKERRQ(ierr);
3421447629fSBarry Smith   ierr = PetscMalloc(size*sizeof(PetscInt),&start);CHKERRQ(ierr);
3431447629fSBarry Smith 
3441447629fSBarry Smith   /* post receives: */
3451447629fSBarry Smith   for (i=0; i<nreceives; i++) {
3461447629fSBarry Smith     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
3471447629fSBarry Smith   }
3481447629fSBarry Smith 
3491447629fSBarry Smith   /* do sends:
3501447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3511447629fSBarry Smith          the ith processor
3521447629fSBarry Smith   */
3531447629fSBarry Smith   start[0] = 0;
3541447629fSBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
3551447629fSBarry Smith   for (i=0; i<n; i++) {
3561447629fSBarry Smith     j = owner[i];
3571447629fSBarry Smith     if (j != rank) {
3581447629fSBarry Smith       ip                   = from_array[i];
3591447629fSBarry Smith       ia                   = to_array[i];
3601447629fSBarry Smith       sindices[start[j]++] = ip;
3611447629fSBarry Smith       sindices[start[j]++] = ia;
3621447629fSBarry Smith     } else { /* compute my own map */
3631447629fSBarry Smith       ip            = from_array[i] - owners[rank];
3641447629fSBarry Smith       ia            = to_array[i];
3651447629fSBarry Smith       aomap_loc[ip] = ia;
3661447629fSBarry Smith     }
3671447629fSBarry Smith   }
3681447629fSBarry Smith 
3691447629fSBarry Smith   start[0] = 0;
3701447629fSBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
3711447629fSBarry Smith   for (i=0,count=0; i<size; i++) {
3721447629fSBarry Smith     if (nprocs[2*i+1]) {
3731447629fSBarry Smith       ierr = MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRQ(ierr);
3741447629fSBarry Smith       count++;
3751447629fSBarry Smith     }
3761447629fSBarry Smith   }
3771447629fSBarry Smith   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
3781447629fSBarry Smith 
3791447629fSBarry Smith   /* wait on sends */
3801447629fSBarry Smith   if (nsends) {
3811447629fSBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
3821447629fSBarry Smith   }
3831447629fSBarry Smith 
3841447629fSBarry Smith   /* recvs */
3851447629fSBarry Smith   count=0;
3861447629fSBarry Smith   for (j= nreceives; j>0; j--) {
3871447629fSBarry Smith     ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr);
3881447629fSBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
3891447629fSBarry Smith     rbuf = rindices+nmax*widx; /* global index */
3901447629fSBarry Smith 
3911447629fSBarry Smith     /* compute local mapping */
3921447629fSBarry Smith     for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
3931447629fSBarry Smith       ip            = rbuf[i] - owners[rank]; /* local index */
3941447629fSBarry Smith       ia            = rbuf[i+1];
3951447629fSBarry Smith       aomap_loc[ip] = ia;
3961447629fSBarry Smith     }
3971447629fSBarry Smith     count++;
3981447629fSBarry Smith   }
3991447629fSBarry Smith 
4001447629fSBarry Smith   ierr = PetscFree(start);CHKERRQ(ierr);
4011447629fSBarry Smith   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
4021447629fSBarry Smith   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
4031447629fSBarry Smith   ierr = PetscFree(owner);CHKERRQ(ierr);
4041447629fSBarry Smith   ierr = PetscFree(nprocs);CHKERRQ(ierr);
4051447629fSBarry Smith   PetscFunctionReturn(0);
4061447629fSBarry Smith }
4071447629fSBarry Smith 
4081447629fSBarry Smith #undef __FUNCT__
4091447629fSBarry Smith #define __FUNCT__ "AOCreate_MemoryScalable"
410*8cc058d9SJed Brown PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
4111447629fSBarry Smith {
4121447629fSBarry Smith   PetscErrorCode    ierr;
4131447629fSBarry Smith   IS                isapp=ao->isapp,ispetsc=ao->ispetsc;
4141447629fSBarry Smith   const PetscInt    *mypetsc,*myapp;
4151447629fSBarry Smith   PetscInt          napp,n_local,N,i,start,*petsc,*lens,*disp;
4161447629fSBarry Smith   MPI_Comm          comm;
4171447629fSBarry Smith   AO_MemoryScalable *aomems;
4181447629fSBarry Smith   PetscLayout       map;
4191447629fSBarry Smith   PetscMPIInt       size,rank;
4201447629fSBarry Smith 
4211447629fSBarry Smith   PetscFunctionBegin;
4221447629fSBarry Smith   /* create special struct aomems */
4231447629fSBarry Smith   ierr     = PetscNewLog(ao, AO_MemoryScalable, &aomems);CHKERRQ(ierr);
4241447629fSBarry Smith   ao->data = (void*) aomems;
4251447629fSBarry Smith   ierr     = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr);
4261447629fSBarry Smith   ierr     = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
4271447629fSBarry Smith 
4281447629fSBarry Smith   /* transmit all local lengths of isapp to all processors */
4291447629fSBarry Smith   ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
4301447629fSBarry Smith   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
4311447629fSBarry Smith   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
4321447629fSBarry Smith   ierr = PetscMalloc2(size,PetscInt,&lens,size,PetscInt,&disp);CHKERRQ(ierr);
4331447629fSBarry Smith   ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr);
4341447629fSBarry Smith   ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRQ(ierr);
4351447629fSBarry Smith 
4361447629fSBarry Smith   N = 0;
4371447629fSBarry Smith   for (i = 0; i < size; i++) {
4381447629fSBarry Smith     disp[i] = N;
4391447629fSBarry Smith     N      += lens[i];
4401447629fSBarry Smith   }
4411447629fSBarry Smith 
4421447629fSBarry Smith   /* If ispetsc is 0 then use "natural" numbering */
4431447629fSBarry Smith   if (napp) {
4441447629fSBarry Smith     if (!ispetsc) {
4451447629fSBarry Smith       start = disp[rank];
4461447629fSBarry Smith       ierr  = PetscMalloc((napp+1) * sizeof(PetscInt), &petsc);CHKERRQ(ierr);
4471447629fSBarry Smith       for (i=0; i<napp; i++) petsc[i] = start + i;
4481447629fSBarry Smith     } else {
4491447629fSBarry Smith       ierr  = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
4501447629fSBarry Smith       petsc = (PetscInt*)mypetsc;
4511447629fSBarry Smith     }
4521447629fSBarry Smith   }
4531447629fSBarry Smith 
4541447629fSBarry Smith   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
4551447629fSBarry Smith   ierr    = PetscLayoutCreate(comm,&map);CHKERRQ(ierr);
4561447629fSBarry Smith   map->bs = 1;
4571447629fSBarry Smith   map->N  = N;
4581447629fSBarry Smith   ierr    = PetscLayoutSetUp(map);CHKERRQ(ierr);
4591447629fSBarry Smith 
4601447629fSBarry Smith   ao->N       = N;
4611447629fSBarry Smith   ao->n       = map->n;
4621447629fSBarry Smith   aomems->map = map;
4631447629fSBarry Smith 
4641447629fSBarry Smith   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
4651447629fSBarry Smith   n_local = map->n;
4661447629fSBarry Smith   ierr    = PetscMalloc2(n_local,PetscInt, &aomems->app_loc,n_local,PetscInt,&aomems->petsc_loc);CHKERRQ(ierr);
4671447629fSBarry Smith   ierr    = PetscLogObjectMemory(ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr);
4681447629fSBarry Smith   ierr    = PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
4691447629fSBarry Smith   ierr    = PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
4701447629fSBarry Smith   ierr    = ISGetIndices(isapp,&myapp);CHKERRQ(ierr);
4711447629fSBarry Smith 
4721447629fSBarry Smith   ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr);
4731447629fSBarry Smith   ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr);
4741447629fSBarry Smith 
4751447629fSBarry Smith   ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr);
4761447629fSBarry Smith   if (napp) {
4771447629fSBarry Smith     if (ispetsc) {
4781447629fSBarry Smith       ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
4791447629fSBarry Smith     } else {
4801447629fSBarry Smith       ierr = PetscFree(petsc);CHKERRQ(ierr);
4811447629fSBarry Smith     }
4821447629fSBarry Smith   }
4831447629fSBarry Smith   ierr = PetscFree2(lens,disp);CHKERRQ(ierr);
4841447629fSBarry Smith   PetscFunctionReturn(0);
4851447629fSBarry Smith }
4861447629fSBarry Smith 
4871447629fSBarry Smith #undef __FUNCT__
4881447629fSBarry Smith #define __FUNCT__ "AOCreateMemoryScalable"
4891447629fSBarry Smith /*@C
4901447629fSBarry Smith    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
4911447629fSBarry Smith 
4921447629fSBarry Smith    Collective on MPI_Comm
4931447629fSBarry Smith 
4941447629fSBarry Smith    Input Parameters:
4951447629fSBarry Smith +  comm - MPI communicator that is to share AO
4961447629fSBarry Smith .  napp - size of integer arrays
4971447629fSBarry Smith .  myapp - integer array that defines an ordering
4981447629fSBarry Smith -  mypetsc - integer array that defines another ordering (may be NULL to
4991447629fSBarry Smith              indicate the natural ordering, that is 0,1,2,3,...)
5001447629fSBarry Smith 
5011447629fSBarry Smith    Output Parameter:
5021447629fSBarry Smith .  aoout - the new application ordering
5031447629fSBarry Smith 
5041447629fSBarry Smith    Level: beginner
5051447629fSBarry Smith 
5061447629fSBarry Smith     Notes: The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
5071447629fSBarry Smith            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
5081447629fSBarry Smith            Comparing with AOCreateBasic(), this routine trades memory with message communication.
5091447629fSBarry Smith 
5101447629fSBarry Smith .keywords: AO, create
5111447629fSBarry Smith 
5121447629fSBarry Smith .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
5131447629fSBarry Smith @*/
5141447629fSBarry Smith PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
5151447629fSBarry Smith {
5161447629fSBarry Smith   PetscErrorCode ierr;
5171447629fSBarry Smith   IS             isapp,ispetsc;
5181447629fSBarry Smith   const PetscInt *app=myapp,*petsc=mypetsc;
5191447629fSBarry Smith 
5201447629fSBarry Smith   PetscFunctionBegin;
5211447629fSBarry Smith   ierr = ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);CHKERRQ(ierr);
5221447629fSBarry Smith   if (mypetsc) {
5231447629fSBarry Smith     ierr = ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);CHKERRQ(ierr);
5241447629fSBarry Smith   } else {
5251447629fSBarry Smith     ispetsc = NULL;
5261447629fSBarry Smith   }
5271447629fSBarry Smith   ierr = AOCreateMemoryScalableIS(isapp,ispetsc,aoout);CHKERRQ(ierr);
5281447629fSBarry Smith   ierr = ISDestroy(&isapp);CHKERRQ(ierr);
5291447629fSBarry Smith   if (mypetsc) {
5301447629fSBarry Smith     ierr = ISDestroy(&ispetsc);CHKERRQ(ierr);
5311447629fSBarry Smith   }
5321447629fSBarry Smith   PetscFunctionReturn(0);
5331447629fSBarry Smith }
5341447629fSBarry Smith 
5351447629fSBarry Smith #undef __FUNCT__
5361447629fSBarry Smith #define __FUNCT__ "AOCreateMemoryScalableIS"
5371447629fSBarry Smith /*@C
5381447629fSBarry Smith    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
5391447629fSBarry Smith 
5401447629fSBarry Smith    Collective on IS
5411447629fSBarry Smith 
5421447629fSBarry Smith    Input Parameters:
5431447629fSBarry Smith +  isapp - index set that defines an ordering
5441447629fSBarry Smith -  ispetsc - index set that defines another ordering (may be NULL to use the
5451447629fSBarry Smith              natural ordering)
5461447629fSBarry Smith 
5471447629fSBarry Smith    Output Parameter:
5481447629fSBarry Smith .  aoout - the new application ordering
5491447629fSBarry Smith 
5501447629fSBarry Smith    Level: beginner
5511447629fSBarry Smith 
5521447629fSBarry Smith     Notes: The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
5531447629fSBarry Smith            that is there cannot be any "holes".
5541447629fSBarry Smith            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
5551447629fSBarry Smith .keywords: AO, create
5561447629fSBarry Smith 
5571447629fSBarry Smith .seealso: AOCreateMemoryScalable(),  AODestroy()
5581447629fSBarry Smith @*/
5591447629fSBarry Smith PetscErrorCode  AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
5601447629fSBarry Smith {
5611447629fSBarry Smith   PetscErrorCode ierr;
5621447629fSBarry Smith   MPI_Comm       comm;
5631447629fSBarry Smith   AO             ao;
5641447629fSBarry Smith 
5651447629fSBarry Smith   PetscFunctionBegin;
5661447629fSBarry Smith   ierr   = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
5671447629fSBarry Smith   ierr   = AOCreate(comm,&ao);CHKERRQ(ierr);
5681447629fSBarry Smith   ierr   = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr);
5691447629fSBarry Smith   ierr   = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
5701447629fSBarry Smith   *aoout = ao;
5711447629fSBarry Smith   PetscFunctionReturn(0);
5721447629fSBarry Smith }
573