xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision d083f849a86f1f43e18d534ee43954e2786cb29a)
11447629fSBarry Smith 
21447629fSBarry Smith /*
31447629fSBarry Smith     The memory scalable AO application ordering routines. These store the
46bd6ae52SBarry Smith   orderings on each processor for that processor's range of values
51447629fSBarry Smith */
61447629fSBarry Smith 
71447629fSBarry Smith #include <../src/vec/is/ao/aoimpl.h>          /*I  "petscao.h"   I*/
81447629fSBarry Smith 
91447629fSBarry Smith typedef struct {
101447629fSBarry Smith   PetscInt    *app_loc;    /* app_loc[i] is the partner for the ith local PETSc slot */
111447629fSBarry Smith   PetscInt    *petsc_loc;  /* petsc_loc[j] is the partner for the jth local app slot */
121447629fSBarry Smith   PetscLayout map;         /* determines the local sizes of ao */
131447629fSBarry Smith } AO_MemoryScalable;
141447629fSBarry Smith 
151447629fSBarry Smith /*
166bd6ae52SBarry Smith        All processors ship the data to process 0 to be printed; note that this is not scalable because
176bd6ae52SBarry Smith        process 0 allocates space for all the orderings entry across all the processes
181447629fSBarry Smith */
191447629fSBarry Smith PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
201447629fSBarry Smith {
211447629fSBarry Smith   PetscErrorCode    ierr;
221447629fSBarry Smith   PetscMPIInt       rank,size;
231447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
241447629fSBarry Smith   PetscBool         iascii;
251447629fSBarry Smith   PetscMPIInt       tag_app,tag_petsc;
261447629fSBarry Smith   PetscLayout       map = aomems->map;
271447629fSBarry Smith   PetscInt          *app,*app_loc,*petsc,*petsc_loc,len,i,j;
281447629fSBarry Smith   MPI_Status        status;
291447629fSBarry Smith 
301447629fSBarry Smith   PetscFunctionBegin;
311447629fSBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
321447629fSBarry Smith   if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);
331447629fSBarry Smith 
341447629fSBarry Smith   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);CHKERRQ(ierr);
351447629fSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);CHKERRQ(ierr);
361447629fSBarry Smith 
371447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_app);CHKERRQ(ierr);
381447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);CHKERRQ(ierr);
391447629fSBarry Smith 
401447629fSBarry Smith   if (!rank) {
411447629fSBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);CHKERRQ(ierr);
421447629fSBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,  "PETSc->App  App->PETSc\n");CHKERRQ(ierr);
431447629fSBarry Smith 
44dcca6d9dSJed Brown     ierr = PetscMalloc2(map->N,&app,map->N,&petsc);CHKERRQ(ierr);
451447629fSBarry Smith     len  = map->n;
461447629fSBarry Smith     /* print local AO */
476bd6ae52SBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"Process [%d]\n",rank);CHKERRQ(ierr);
481447629fSBarry Smith     for (i=0; i<len; i++) {
491447629fSBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);CHKERRQ(ierr);
501447629fSBarry Smith     }
511447629fSBarry Smith 
521447629fSBarry Smith     /* recv and print off-processor's AO */
531447629fSBarry Smith     for (i=1; i<size; i++) {
541447629fSBarry Smith       len       = map->range[i+1] - map->range[i];
551447629fSBarry Smith       app_loc   = app  + map->range[i];
561447629fSBarry Smith       petsc_loc = petsc+ map->range[i];
571447629fSBarry Smith       ierr      = MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
581447629fSBarry Smith       ierr      = MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
591447629fSBarry Smith       ierr      = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);CHKERRQ(ierr);
601447629fSBarry Smith       for (j=0; j<len; j++) {
611447629fSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);CHKERRQ(ierr);
621447629fSBarry Smith       }
631447629fSBarry Smith     }
641447629fSBarry Smith     ierr = PetscFree2(app,petsc);CHKERRQ(ierr);
651447629fSBarry Smith 
661447629fSBarry Smith   } else {
671447629fSBarry Smith     /* send values */
681447629fSBarry Smith     ierr = MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
691447629fSBarry Smith     ierr = MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
701447629fSBarry Smith   }
711447629fSBarry Smith   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
721447629fSBarry Smith   PetscFunctionReturn(0);
731447629fSBarry Smith }
741447629fSBarry Smith 
751447629fSBarry Smith PetscErrorCode AODestroy_MemoryScalable(AO ao)
761447629fSBarry Smith {
771447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
781447629fSBarry Smith   PetscErrorCode    ierr;
791447629fSBarry Smith 
801447629fSBarry Smith   PetscFunctionBegin;
811447629fSBarry Smith   ierr = PetscFree2(aomems->app_loc,aomems->petsc_loc);CHKERRQ(ierr);
821447629fSBarry Smith   ierr = PetscLayoutDestroy(&aomems->map);CHKERRQ(ierr);
831447629fSBarry Smith   ierr = PetscFree(aomems);CHKERRQ(ierr);
841447629fSBarry Smith   PetscFunctionReturn(0);
851447629fSBarry Smith }
861447629fSBarry Smith 
871447629fSBarry Smith /*
881447629fSBarry Smith    Input Parameters:
891447629fSBarry Smith +   ao - the application ordering context
901447629fSBarry Smith .   n  - the number of integers in ia[]
911447629fSBarry Smith .   ia - the integers; these are replaced with their mapped value
921447629fSBarry Smith -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
931447629fSBarry Smith 
941447629fSBarry Smith    Output Parameter:
951447629fSBarry Smith .   ia - the mapped interges
961447629fSBarry Smith  */
976bd6ae52SBarry Smith PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,const PetscInt *maploc)
981447629fSBarry Smith {
991447629fSBarry Smith   PetscErrorCode    ierr;
1001447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
1011447629fSBarry Smith   MPI_Comm          comm;
1021447629fSBarry Smith   PetscMPIInt       rank,size,tag1,tag2;
10376ec1555SBarry Smith   PetscInt          *owner,*start,*sizes,nsends,nreceives;
1041447629fSBarry Smith   PetscInt          nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
1056bd6ae52SBarry Smith   const PetscInt    *owners = aomems->map->range;
1061447629fSBarry Smith   MPI_Request       *send_waits,*recv_waits,*send_waits2,*recv_waits2;
1071447629fSBarry Smith   MPI_Status        recv_status;
1081447629fSBarry Smith   PetscMPIInt       nindices,source,widx;
1091447629fSBarry Smith   PetscInt          *rbuf,*sbuf;
1101447629fSBarry Smith   MPI_Status        *send_status,*send_status2;
1111447629fSBarry Smith 
1121447629fSBarry Smith   PetscFunctionBegin;
1131447629fSBarry Smith   ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr);
1141447629fSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1151447629fSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1161447629fSBarry Smith 
1171447629fSBarry Smith   /*  first count number of contributors to each processor */
118037dbc42SBarry Smith   ierr = PetscMalloc2(2*size,&sizes,size,&start);CHKERRQ(ierr);
11976ec1555SBarry Smith   ierr = PetscMemzero(sizes,2*size*sizeof(PetscInt));CHKERRQ(ierr);
120f628708eSJed Brown   ierr = PetscCalloc1(n,&owner);CHKERRQ(ierr);
1211447629fSBarry Smith 
1221447629fSBarry Smith   j       = 0;
1231447629fSBarry Smith   lastidx = -1;
1241447629fSBarry Smith   for (i=0; i<n; i++) {
1256bd6ae52SBarry Smith     if (ia[i] < 0) owner[i] = -1; /* mark negative entries (which are not to be mapped) with a special negative value */
1266bd6ae52SBarry Smith     if (ia[i] >= ao->N) owner[i] = -2; /* mark out of range entries with special negative value */
1276bd6ae52SBarry Smith     else {
1281447629fSBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
1291447629fSBarry Smith       if (lastidx > (idx = ia[i])) j = 0;
1301447629fSBarry Smith       lastidx = idx;
1311447629fSBarry Smith       for (; j<size; j++) {
1321447629fSBarry Smith         if (idx >= owners[j] && idx < owners[j+1]) {
13376ec1555SBarry Smith           sizes[2*j]++;     /* num of indices to be sent */
13476ec1555SBarry Smith           sizes[2*j+1] = 1; /* send to proc[j] */
1351447629fSBarry Smith           owner[i]     = j;
1361447629fSBarry Smith           break;
1371447629fSBarry Smith         }
1381447629fSBarry Smith       }
1391447629fSBarry Smith     }
1406bd6ae52SBarry Smith   }
14176ec1555SBarry Smith   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
1421447629fSBarry Smith   nsends        = 0;
14376ec1555SBarry Smith   for (i=0; i<size; i++) nsends += sizes[2*i+1];
1441447629fSBarry Smith 
1451447629fSBarry Smith   /* inform other processors of number of messages and max length*/
14676ec1555SBarry Smith   ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr);
1471447629fSBarry Smith 
1481447629fSBarry Smith   /* allocate arrays */
1491447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag1);CHKERRQ(ierr);
1501447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag2);CHKERRQ(ierr);
1511447629fSBarry Smith 
152dcca6d9dSJed Brown   ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr);
153dcca6d9dSJed Brown   ierr = PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2);CHKERRQ(ierr);
1541447629fSBarry Smith 
155dcca6d9dSJed Brown   ierr = PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr);
156dcca6d9dSJed Brown   ierr = PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2);CHKERRQ(ierr);
1571447629fSBarry Smith 
1581447629fSBarry Smith   /* post 1st receives: receive others requests
1591447629fSBarry Smith      since we don't know how long each individual message is we
1601447629fSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1611447629fSBarry Smith      this is a lot of wasted space.
1621447629fSBarry Smith   */
1631447629fSBarry Smith   for (i=0,count=0; i<nreceives; i++) {
1641447629fSBarry Smith     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr);
1651447629fSBarry Smith   }
1661447629fSBarry Smith 
1671447629fSBarry Smith   /* do 1st sends:
1681447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1691447629fSBarry Smith          the ith processor
1701447629fSBarry Smith   */
1711447629fSBarry Smith   start[0] = 0;
17276ec1555SBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
1731447629fSBarry Smith   for (i=0; i<n; i++) {
1741447629fSBarry Smith     j = owner[i];
1756bd6ae52SBarry Smith     if (j == -1) continue; /* do not remap negative entries in ia[] */
1766bd6ae52SBarry Smith     else if (j == -2) { /* out of range entries get mapped to -1 */
1776bd6ae52SBarry Smith       ia[i] = -1;
1786bd6ae52SBarry Smith       continue;
1796bd6ae52SBarry Smith     } else if (j != rank) {
1801447629fSBarry Smith       sindices[start[j]++]  = ia[i];
1811447629fSBarry Smith     } else { /* compute my own map */
1821447629fSBarry Smith       ia[i] = maploc[ia[i]-owners[rank]];
1831447629fSBarry Smith     }
1841447629fSBarry Smith   }
1851447629fSBarry Smith 
1861447629fSBarry Smith   start[0] = 0;
18776ec1555SBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
1881447629fSBarry Smith   for (i=0,count=0; i<size; i++) {
18976ec1555SBarry Smith     if (sizes[2*i+1]) {
1901447629fSBarry Smith       /* send my request to others */
19176ec1555SBarry Smith       ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count);CHKERRQ(ierr);
1921447629fSBarry Smith       /* post receive for the answer of my request */
19376ec1555SBarry Smith       ierr = MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);CHKERRQ(ierr);
1941447629fSBarry Smith       count++;
1951447629fSBarry Smith     }
1961447629fSBarry Smith   }
1971447629fSBarry Smith   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
1981447629fSBarry Smith 
1991447629fSBarry Smith   /* wait on 1st sends */
2001447629fSBarry Smith   if (nsends) {
2011447629fSBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
2021447629fSBarry Smith   }
2031447629fSBarry Smith 
2041447629fSBarry Smith   /* 1st recvs: other's requests */
2051447629fSBarry Smith   for (j=0; j< nreceives; j++) {
2061447629fSBarry Smith     ierr   = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr); /* idx: index of handle for operation that completed */
2071447629fSBarry Smith     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
2081447629fSBarry Smith     rbuf   = rindices+nmax*widx; /* global index */
2091447629fSBarry Smith     source = recv_status.MPI_SOURCE;
2101447629fSBarry Smith 
2111447629fSBarry Smith     /* compute mapping */
2121447629fSBarry Smith     sbuf = rbuf;
2131447629fSBarry Smith     for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];
2141447629fSBarry Smith 
2151447629fSBarry Smith     /* send mapping back to the sender */
2161447629fSBarry Smith     ierr = MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);CHKERRQ(ierr);
2171447629fSBarry Smith   }
2181447629fSBarry Smith 
2191447629fSBarry Smith   /* wait on 2nd sends */
2201447629fSBarry Smith   if (nreceives) {
2211447629fSBarry Smith     ierr = MPI_Waitall(nreceives,send_waits2,send_status2);CHKERRQ(ierr);
2221447629fSBarry Smith   }
2231447629fSBarry Smith 
2241447629fSBarry Smith   /* 2nd recvs: for the answer of my request */
2251447629fSBarry Smith   for (j=0; j< nsends; j++) {
2261447629fSBarry Smith     ierr   = MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);CHKERRQ(ierr);
2271447629fSBarry Smith     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
2281447629fSBarry Smith     source = recv_status.MPI_SOURCE;
2291447629fSBarry Smith     /* pack output ia[] */
2301447629fSBarry Smith     rbuf  = sindices2+start[source];
2311447629fSBarry Smith     count = 0;
2321447629fSBarry Smith     for (i=0; i<n; i++) {
2331447629fSBarry Smith       if (source == owner[i]) ia[i] = rbuf[count++];
2341447629fSBarry Smith     }
2351447629fSBarry Smith   }
2361447629fSBarry Smith 
2371447629fSBarry Smith   /* free arrays */
23876ec1555SBarry Smith   ierr = PetscFree2(sizes,start);CHKERRQ(ierr);
2391447629fSBarry Smith   ierr = PetscFree(owner);CHKERRQ(ierr);
2401447629fSBarry Smith   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
2411447629fSBarry Smith   ierr = PetscFree2(rindices2,recv_waits2);CHKERRQ(ierr);
2421447629fSBarry Smith   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
2431447629fSBarry Smith   ierr = PetscFree3(sindices2,send_waits2,send_status2);CHKERRQ(ierr);
2441447629fSBarry Smith   PetscFunctionReturn(0);
2451447629fSBarry Smith }
2461447629fSBarry Smith 
2471447629fSBarry Smith PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
2481447629fSBarry Smith {
2491447629fSBarry Smith   PetscErrorCode    ierr;
2501447629fSBarry Smith   AO_MemoryScalable *aomems  = (AO_MemoryScalable*)ao->data;
2511447629fSBarry Smith   PetscInt          *app_loc = aomems->app_loc;
2521447629fSBarry Smith 
2531447629fSBarry Smith   PetscFunctionBegin;
2541447629fSBarry Smith   ierr = AOMap_MemoryScalable_private(ao,n,ia,app_loc);CHKERRQ(ierr);
2551447629fSBarry Smith   PetscFunctionReturn(0);
2561447629fSBarry Smith }
2571447629fSBarry Smith 
2581447629fSBarry Smith PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
2591447629fSBarry Smith {
2601447629fSBarry Smith   PetscErrorCode    ierr;
2611447629fSBarry Smith   AO_MemoryScalable *aomems    = (AO_MemoryScalable*)ao->data;
2621447629fSBarry Smith   PetscInt          *petsc_loc = aomems->petsc_loc;
2631447629fSBarry Smith 
2641447629fSBarry Smith   PetscFunctionBegin;
2651447629fSBarry Smith   ierr = AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);CHKERRQ(ierr);
2661447629fSBarry Smith   PetscFunctionReturn(0);
2671447629fSBarry Smith }
2681447629fSBarry Smith 
2691447629fSBarry Smith static struct _AOOps AOOps_MemoryScalable = {
2701447629fSBarry Smith   AOView_MemoryScalable,
2711447629fSBarry Smith   AODestroy_MemoryScalable,
2721447629fSBarry Smith   AOPetscToApplication_MemoryScalable,
2731447629fSBarry Smith   AOApplicationToPetsc_MemoryScalable,
2741447629fSBarry Smith   0,
2751447629fSBarry Smith   0,
2761447629fSBarry Smith   0,
2771447629fSBarry Smith   0
2781447629fSBarry Smith };
2791447629fSBarry Smith 
2801447629fSBarry Smith PetscErrorCode  AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
2811447629fSBarry Smith {
2821447629fSBarry Smith   PetscErrorCode    ierr;
2831447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
2841447629fSBarry Smith   PetscLayout       map     = aomems->map;
2851447629fSBarry Smith   PetscInt          n_local = map->n,i,j;
2861447629fSBarry Smith   PetscMPIInt       rank,size,tag;
28776ec1555SBarry Smith   PetscInt          *owner,*start,*sizes,nsends,nreceives;
2881447629fSBarry Smith   PetscInt          nmax,count,*sindices,*rindices,idx,lastidx;
2891447629fSBarry Smith   PetscInt          *owners = aomems->map->range;
2901447629fSBarry Smith   MPI_Request       *send_waits,*recv_waits;
2911447629fSBarry Smith   MPI_Status        recv_status;
2921447629fSBarry Smith   PetscMPIInt       nindices,widx;
2931447629fSBarry Smith   PetscInt          *rbuf;
2941447629fSBarry Smith   PetscInt          n=napp,ip,ia;
2951447629fSBarry Smith   MPI_Status        *send_status;
2961447629fSBarry Smith 
2971447629fSBarry Smith   PetscFunctionBegin;
2981447629fSBarry Smith   ierr = PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
2991447629fSBarry Smith 
3001447629fSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3011447629fSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3021447629fSBarry Smith 
3031447629fSBarry Smith   /*  first count number of contributors (of from_array[]) to each processor */
304f628708eSJed Brown   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
305f628708eSJed Brown   ierr = PetscMalloc1(n,&owner);CHKERRQ(ierr);
3061447629fSBarry Smith 
3071447629fSBarry Smith   j       = 0;
3081447629fSBarry Smith   lastidx = -1;
3091447629fSBarry Smith   for (i=0; i<n; i++) {
3101447629fSBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
3111447629fSBarry Smith     if (lastidx > (idx = from_array[i])) j = 0;
3121447629fSBarry Smith     lastidx = idx;
3131447629fSBarry Smith     for (; j<size; j++) {
3141447629fSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
31576ec1555SBarry Smith         sizes[2*j]  += 2; /* num of indices to be sent - in pairs (ip,ia) */
31676ec1555SBarry Smith         sizes[2*j+1] = 1; /* send to proc[j] */
3171447629fSBarry Smith         owner[i]      = j;
3181447629fSBarry Smith         break;
3191447629fSBarry Smith       }
3201447629fSBarry Smith     }
3211447629fSBarry Smith   }
32276ec1555SBarry Smith   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
3231447629fSBarry Smith   nsends        = 0;
32476ec1555SBarry Smith   for (i=0; i<size; i++) nsends += sizes[2*i+1];
3251447629fSBarry Smith 
3261447629fSBarry Smith   /* inform other processors of number of messages and max length*/
32776ec1555SBarry Smith   ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr);
3281447629fSBarry Smith 
3291447629fSBarry Smith   /* allocate arrays */
3301447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag);CHKERRQ(ierr);
331dcca6d9dSJed Brown   ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr);
332dcca6d9dSJed Brown   ierr = PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr);
333785e854fSJed Brown   ierr = PetscMalloc1(size,&start);CHKERRQ(ierr);
3341447629fSBarry Smith 
3351447629fSBarry Smith   /* post receives: */
3361447629fSBarry Smith   for (i=0; i<nreceives; i++) {
3371447629fSBarry Smith     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
3381447629fSBarry Smith   }
3391447629fSBarry Smith 
3401447629fSBarry Smith   /* do sends:
3411447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3421447629fSBarry Smith          the ith processor
3431447629fSBarry Smith   */
3441447629fSBarry Smith   start[0] = 0;
34576ec1555SBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
3461447629fSBarry Smith   for (i=0; i<n; i++) {
3471447629fSBarry Smith     j = owner[i];
3481447629fSBarry Smith     if (j != rank) {
3491447629fSBarry Smith       ip                   = from_array[i];
3501447629fSBarry Smith       ia                   = to_array[i];
3511447629fSBarry Smith       sindices[start[j]++] = ip;
3521447629fSBarry Smith       sindices[start[j]++] = ia;
3531447629fSBarry Smith     } else { /* compute my own map */
3541447629fSBarry Smith       ip            = from_array[i] - owners[rank];
3551447629fSBarry Smith       ia            = to_array[i];
3561447629fSBarry Smith       aomap_loc[ip] = ia;
3571447629fSBarry Smith     }
3581447629fSBarry Smith   }
3591447629fSBarry Smith 
3601447629fSBarry Smith   start[0] = 0;
36176ec1555SBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
3621447629fSBarry Smith   for (i=0,count=0; i<size; i++) {
36376ec1555SBarry Smith     if (sizes[2*i+1]) {
36476ec1555SBarry Smith       ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRQ(ierr);
3651447629fSBarry Smith       count++;
3661447629fSBarry Smith     }
3671447629fSBarry Smith   }
3681447629fSBarry Smith   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
3691447629fSBarry Smith 
3701447629fSBarry Smith   /* wait on sends */
3711447629fSBarry Smith   if (nsends) {
3721447629fSBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
3731447629fSBarry Smith   }
3741447629fSBarry Smith 
3751447629fSBarry Smith   /* recvs */
3761447629fSBarry Smith   count=0;
3771447629fSBarry Smith   for (j= nreceives; j>0; j--) {
3781447629fSBarry Smith     ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr);
3791447629fSBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
3801447629fSBarry Smith     rbuf = rindices+nmax*widx; /* global index */
3811447629fSBarry Smith 
3821447629fSBarry Smith     /* compute local mapping */
3831447629fSBarry Smith     for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
3841447629fSBarry Smith       ip            = rbuf[i] - owners[rank]; /* local index */
3851447629fSBarry Smith       ia            = rbuf[i+1];
3861447629fSBarry Smith       aomap_loc[ip] = ia;
3871447629fSBarry Smith     }
3881447629fSBarry Smith     count++;
3891447629fSBarry Smith   }
3901447629fSBarry Smith 
3911447629fSBarry Smith   ierr = PetscFree(start);CHKERRQ(ierr);
3921447629fSBarry Smith   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
3931447629fSBarry Smith   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
3941447629fSBarry Smith   ierr = PetscFree(owner);CHKERRQ(ierr);
39576ec1555SBarry Smith   ierr = PetscFree(sizes);CHKERRQ(ierr);
3961447629fSBarry Smith   PetscFunctionReturn(0);
3971447629fSBarry Smith }
3981447629fSBarry Smith 
3998cc058d9SJed Brown PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
4001447629fSBarry Smith {
4011447629fSBarry Smith   PetscErrorCode    ierr;
4021447629fSBarry Smith   IS                isapp=ao->isapp,ispetsc=ao->ispetsc;
4031447629fSBarry Smith   const PetscInt    *mypetsc,*myapp;
4041447629fSBarry Smith   PetscInt          napp,n_local,N,i,start,*petsc,*lens,*disp;
4051447629fSBarry Smith   MPI_Comm          comm;
4061447629fSBarry Smith   AO_MemoryScalable *aomems;
4071447629fSBarry Smith   PetscLayout       map;
4081447629fSBarry Smith   PetscMPIInt       size,rank;
4091447629fSBarry Smith 
4101447629fSBarry Smith   PetscFunctionBegin;
41101e608bcSBarry Smith   if (!isapp) SETERRQ(PetscObjectComm((PetscObject)ao),PETSC_ERR_ARG_WRONGSTATE,"AOSetIS() must be called before AOSetType()");
4121447629fSBarry Smith   /* create special struct aomems */
413b00a9115SJed Brown   ierr     = PetscNewLog(ao,&aomems);CHKERRQ(ierr);
4141447629fSBarry Smith   ao->data = (void*) aomems;
4151447629fSBarry Smith   ierr     = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr);
4161447629fSBarry Smith   ierr     = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
4171447629fSBarry Smith 
4181447629fSBarry Smith   /* transmit all local lengths of isapp to all processors */
4191447629fSBarry Smith   ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
4201447629fSBarry Smith   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
4211447629fSBarry Smith   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
422dcca6d9dSJed Brown   ierr = PetscMalloc2(size,&lens,size,&disp);CHKERRQ(ierr);
4231447629fSBarry Smith   ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr);
4241447629fSBarry Smith   ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRQ(ierr);
4251447629fSBarry Smith 
4261447629fSBarry Smith   N = 0;
4271447629fSBarry Smith   for (i = 0; i < size; i++) {
4281447629fSBarry Smith     disp[i] = N;
4291447629fSBarry Smith     N      += lens[i];
4301447629fSBarry Smith   }
4311447629fSBarry Smith 
4321447629fSBarry Smith   /* If ispetsc is 0 then use "natural" numbering */
4331447629fSBarry Smith   if (napp) {
4341447629fSBarry Smith     if (!ispetsc) {
4351447629fSBarry Smith       start = disp[rank];
436854ce69bSBarry Smith       ierr  = PetscMalloc1(napp+1, &petsc);CHKERRQ(ierr);
4371447629fSBarry Smith       for (i=0; i<napp; i++) petsc[i] = start + i;
4381447629fSBarry Smith     } else {
4391447629fSBarry Smith       ierr  = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
4401447629fSBarry Smith       petsc = (PetscInt*)mypetsc;
4411447629fSBarry Smith     }
4424a2f8832SBarry Smith   } else {
4434a2f8832SBarry Smith     petsc = NULL;
4441447629fSBarry Smith   }
4451447629fSBarry Smith 
4461447629fSBarry Smith   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
4471447629fSBarry Smith   ierr    = PetscLayoutCreate(comm,&map);CHKERRQ(ierr);
4481447629fSBarry Smith   map->bs = 1;
4491447629fSBarry Smith   map->N  = N;
4501447629fSBarry Smith   ierr    = PetscLayoutSetUp(map);CHKERRQ(ierr);
4511447629fSBarry Smith 
4521447629fSBarry Smith   ao->N       = N;
4531447629fSBarry Smith   ao->n       = map->n;
4541447629fSBarry Smith   aomems->map = map;
4551447629fSBarry Smith 
4561447629fSBarry Smith   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
4571447629fSBarry Smith   n_local = map->n;
458dcca6d9dSJed Brown   ierr    = PetscMalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);CHKERRQ(ierr);
4593bb1ff40SBarry Smith   ierr    = PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr);
4601447629fSBarry Smith   ierr    = PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
4611447629fSBarry Smith   ierr    = PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
4621447629fSBarry Smith   ierr    = ISGetIndices(isapp,&myapp);CHKERRQ(ierr);
4631447629fSBarry Smith 
4641447629fSBarry Smith   ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr);
4651447629fSBarry Smith   ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr);
4661447629fSBarry Smith 
4671447629fSBarry Smith   ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr);
4681447629fSBarry Smith   if (napp) {
4691447629fSBarry Smith     if (ispetsc) {
4701447629fSBarry Smith       ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
4711447629fSBarry Smith     } else {
4721447629fSBarry Smith       ierr = PetscFree(petsc);CHKERRQ(ierr);
4731447629fSBarry Smith     }
4741447629fSBarry Smith   }
4751447629fSBarry Smith   ierr = PetscFree2(lens,disp);CHKERRQ(ierr);
4761447629fSBarry Smith   PetscFunctionReturn(0);
4771447629fSBarry Smith }
4781447629fSBarry Smith 
4791447629fSBarry Smith /*@C
4801447629fSBarry Smith    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
4811447629fSBarry Smith 
482*d083f849SBarry Smith    Collective
4831447629fSBarry Smith 
4841447629fSBarry Smith    Input Parameters:
4851447629fSBarry Smith +  comm - MPI communicator that is to share AO
4861447629fSBarry Smith .  napp - size of integer arrays
4871447629fSBarry Smith .  myapp - integer array that defines an ordering
4881447629fSBarry Smith -  mypetsc - integer array that defines another ordering (may be NULL to
4891447629fSBarry Smith              indicate the natural ordering, that is 0,1,2,3,...)
4901447629fSBarry Smith 
4911447629fSBarry Smith    Output Parameter:
4921447629fSBarry Smith .  aoout - the new application ordering
4931447629fSBarry Smith 
4941447629fSBarry Smith    Level: beginner
4951447629fSBarry Smith 
49695452b02SPatrick Sanan     Notes:
49795452b02SPatrick Sanan     The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
4981447629fSBarry Smith            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
4991447629fSBarry Smith            Comparing with AOCreateBasic(), this routine trades memory with message communication.
5001447629fSBarry Smith 
5011447629fSBarry Smith .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
5021447629fSBarry Smith @*/
5031447629fSBarry Smith PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
5041447629fSBarry Smith {
5051447629fSBarry Smith   PetscErrorCode ierr;
5061447629fSBarry Smith   IS             isapp,ispetsc;
5071447629fSBarry Smith   const PetscInt *app=myapp,*petsc=mypetsc;
5081447629fSBarry Smith 
5091447629fSBarry Smith   PetscFunctionBegin;
5101447629fSBarry Smith   ierr = ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);CHKERRQ(ierr);
5111447629fSBarry Smith   if (mypetsc) {
5121447629fSBarry Smith     ierr = ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);CHKERRQ(ierr);
5131447629fSBarry Smith   } else {
5141447629fSBarry Smith     ispetsc = NULL;
5151447629fSBarry Smith   }
5161447629fSBarry Smith   ierr = AOCreateMemoryScalableIS(isapp,ispetsc,aoout);CHKERRQ(ierr);
5171447629fSBarry Smith   ierr = ISDestroy(&isapp);CHKERRQ(ierr);
5181447629fSBarry Smith   if (mypetsc) {
5191447629fSBarry Smith     ierr = ISDestroy(&ispetsc);CHKERRQ(ierr);
5201447629fSBarry Smith   }
5211447629fSBarry Smith   PetscFunctionReturn(0);
5221447629fSBarry Smith }
5231447629fSBarry Smith 
5241447629fSBarry Smith /*@C
5251447629fSBarry Smith    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
5261447629fSBarry Smith 
5271447629fSBarry Smith    Collective on IS
5281447629fSBarry Smith 
5291447629fSBarry Smith    Input Parameters:
5301447629fSBarry Smith +  isapp - index set that defines an ordering
5311447629fSBarry Smith -  ispetsc - index set that defines another ordering (may be NULL to use the
5321447629fSBarry Smith              natural ordering)
5331447629fSBarry Smith 
5341447629fSBarry Smith    Output Parameter:
5351447629fSBarry Smith .  aoout - the new application ordering
5361447629fSBarry Smith 
5371447629fSBarry Smith    Level: beginner
5381447629fSBarry Smith 
53995452b02SPatrick Sanan     Notes:
54095452b02SPatrick Sanan     The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
5411447629fSBarry Smith            that is there cannot be any "holes".
5421447629fSBarry Smith            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
5431447629fSBarry Smith .seealso: AOCreateMemoryScalable(),  AODestroy()
5441447629fSBarry Smith @*/
5451447629fSBarry Smith PetscErrorCode  AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
5461447629fSBarry Smith {
5471447629fSBarry Smith   PetscErrorCode ierr;
5481447629fSBarry Smith   MPI_Comm       comm;
5491447629fSBarry Smith   AO             ao;
5501447629fSBarry Smith 
5511447629fSBarry Smith   PetscFunctionBegin;
5521447629fSBarry Smith   ierr   = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
5531447629fSBarry Smith   ierr   = AOCreate(comm,&ao);CHKERRQ(ierr);
5541447629fSBarry Smith   ierr   = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr);
5551447629fSBarry Smith   ierr   = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
556817ea411SJed Brown   ierr   = AOViewFromOptions(ao,NULL,"-ao_view");CHKERRQ(ierr);
5571447629fSBarry Smith   *aoout = ao;
5581447629fSBarry Smith   PetscFunctionReturn(0);
5591447629fSBarry Smith }
560