xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision 95452b02e12c0ee11232c7ff2b24b568a8e07e43)
11447629fSBarry Smith 
21447629fSBarry Smith /*
31447629fSBarry Smith     The memory scalable AO application ordering routines. These store the
41447629fSBarry Smith   local orderings on each processor.
51447629fSBarry Smith */
61447629fSBarry Smith 
71447629fSBarry Smith #include <../src/vec/is/ao/aoimpl.h>          /*I  "petscao.h"   I*/
81447629fSBarry Smith 
91447629fSBarry Smith typedef struct {
101447629fSBarry Smith   PetscInt    *app_loc;    /* app_loc[i] is the partner for the ith local PETSc slot */
111447629fSBarry Smith   PetscInt    *petsc_loc;  /* petsc_loc[j] is the partner for the jth local app slot */
121447629fSBarry Smith   PetscLayout map;         /* determines the local sizes of ao */
131447629fSBarry Smith } AO_MemoryScalable;
141447629fSBarry Smith 
151447629fSBarry Smith /*
161447629fSBarry Smith        All processors have the same data so processor 1 prints it
171447629fSBarry Smith */
181447629fSBarry Smith PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
191447629fSBarry Smith {
201447629fSBarry Smith   PetscErrorCode    ierr;
211447629fSBarry Smith   PetscMPIInt       rank,size;
221447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
231447629fSBarry Smith   PetscBool         iascii;
241447629fSBarry Smith   PetscMPIInt       tag_app,tag_petsc;
251447629fSBarry Smith   PetscLayout       map = aomems->map;
261447629fSBarry Smith   PetscInt          *app,*app_loc,*petsc,*petsc_loc,len,i,j;
271447629fSBarry Smith   MPI_Status        status;
281447629fSBarry Smith 
291447629fSBarry Smith   PetscFunctionBegin;
301447629fSBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
311447629fSBarry Smith   if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);
321447629fSBarry Smith 
331447629fSBarry Smith   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);CHKERRQ(ierr);
341447629fSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);CHKERRQ(ierr);
351447629fSBarry Smith 
361447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_app);CHKERRQ(ierr);
371447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);CHKERRQ(ierr);
381447629fSBarry Smith 
391447629fSBarry Smith   if (!rank) {
401447629fSBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);CHKERRQ(ierr);
411447629fSBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,  "PETSc->App  App->PETSc\n");CHKERRQ(ierr);
421447629fSBarry Smith 
43dcca6d9dSJed Brown     ierr = PetscMalloc2(map->N,&app,map->N,&petsc);CHKERRQ(ierr);
441447629fSBarry Smith     len  = map->n;
451447629fSBarry Smith     /* print local AO */
461447629fSBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",rank);CHKERRQ(ierr);
471447629fSBarry Smith     for (i=0; i<len; i++) {
481447629fSBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);CHKERRQ(ierr);
491447629fSBarry Smith     }
501447629fSBarry Smith 
511447629fSBarry Smith     /* recv and print off-processor's AO */
521447629fSBarry Smith     for (i=1; i<size; i++) {
531447629fSBarry Smith       len       = map->range[i+1] - map->range[i];
541447629fSBarry Smith       app_loc   = app  + map->range[i];
551447629fSBarry Smith       petsc_loc = petsc+ map->range[i];
561447629fSBarry Smith       ierr      = MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
571447629fSBarry Smith       ierr      = MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
581447629fSBarry Smith       ierr      = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);CHKERRQ(ierr);
591447629fSBarry Smith       for (j=0; j<len; j++) {
601447629fSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);CHKERRQ(ierr);
611447629fSBarry Smith       }
621447629fSBarry Smith     }
631447629fSBarry Smith     ierr = PetscFree2(app,petsc);CHKERRQ(ierr);
641447629fSBarry Smith 
651447629fSBarry Smith   } else {
661447629fSBarry Smith     /* send values */
671447629fSBarry Smith     ierr = MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
681447629fSBarry Smith     ierr = MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
691447629fSBarry Smith   }
701447629fSBarry Smith   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
711447629fSBarry Smith   PetscFunctionReturn(0);
721447629fSBarry Smith }
731447629fSBarry Smith 
741447629fSBarry Smith PetscErrorCode AODestroy_MemoryScalable(AO ao)
751447629fSBarry Smith {
761447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
771447629fSBarry Smith   PetscErrorCode    ierr;
781447629fSBarry Smith 
791447629fSBarry Smith   PetscFunctionBegin;
801447629fSBarry Smith   ierr = PetscFree2(aomems->app_loc,aomems->petsc_loc);CHKERRQ(ierr);
811447629fSBarry Smith   ierr = PetscLayoutDestroy(&aomems->map);CHKERRQ(ierr);
821447629fSBarry Smith   ierr = PetscFree(aomems);CHKERRQ(ierr);
831447629fSBarry Smith   PetscFunctionReturn(0);
841447629fSBarry Smith }
851447629fSBarry Smith 
861447629fSBarry Smith /*
871447629fSBarry Smith    Input Parameters:
881447629fSBarry Smith +   ao - the application ordering context
891447629fSBarry Smith .   n  - the number of integers in ia[]
901447629fSBarry Smith .   ia - the integers; these are replaced with their mapped value
911447629fSBarry Smith -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
921447629fSBarry Smith 
931447629fSBarry Smith    Output Parameter:
941447629fSBarry Smith .   ia - the mapped interges
951447629fSBarry Smith  */
961447629fSBarry Smith PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,PetscInt *maploc)
971447629fSBarry Smith {
981447629fSBarry Smith   PetscErrorCode    ierr;
991447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
1001447629fSBarry Smith   MPI_Comm          comm;
1011447629fSBarry Smith   PetscMPIInt       rank,size,tag1,tag2;
10276ec1555SBarry Smith   PetscInt          *owner,*start,*sizes,nsends,nreceives;
1031447629fSBarry Smith   PetscInt          nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
1041447629fSBarry Smith   PetscInt          *owners = aomems->map->range;
1051447629fSBarry Smith   MPI_Request       *send_waits,*recv_waits,*send_waits2,*recv_waits2;
1061447629fSBarry Smith   MPI_Status        recv_status;
1071447629fSBarry Smith   PetscMPIInt       nindices,source,widx;
1081447629fSBarry Smith   PetscInt          *rbuf,*sbuf;
1091447629fSBarry Smith   MPI_Status        *send_status,*send_status2;
1101447629fSBarry Smith 
1111447629fSBarry Smith   PetscFunctionBegin;
1121447629fSBarry Smith   ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr);
1131447629fSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1141447629fSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1151447629fSBarry Smith 
1161447629fSBarry Smith   /*  first count number of contributors to each processor */
117037dbc42SBarry Smith   ierr = PetscMalloc2(2*size,&sizes,size,&start);CHKERRQ(ierr);
11876ec1555SBarry Smith   ierr = PetscMemzero(sizes,2*size*sizeof(PetscInt));CHKERRQ(ierr);
119f628708eSJed Brown   ierr = PetscCalloc1(n,&owner);CHKERRQ(ierr);
1201447629fSBarry Smith 
1211447629fSBarry Smith   j       = 0;
1221447629fSBarry Smith   lastidx = -1;
1231447629fSBarry Smith   for (i=0; i<n; i++) {
1241447629fSBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
1251447629fSBarry Smith     if (lastidx > (idx = ia[i])) j = 0;
1261447629fSBarry Smith     lastidx = idx;
1271447629fSBarry Smith     for (; j<size; j++) {
1281447629fSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
12976ec1555SBarry Smith         sizes[2*j]++;     /* num of indices to be sent */
13076ec1555SBarry Smith         sizes[2*j+1] = 1; /* send to proc[j] */
1311447629fSBarry Smith         owner[i]      = j;
1321447629fSBarry Smith         break;
1331447629fSBarry Smith       }
1341447629fSBarry Smith     }
1351447629fSBarry Smith   }
13676ec1555SBarry Smith   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
1371447629fSBarry Smith   nsends        = 0;
13876ec1555SBarry Smith   for (i=0; i<size; i++) nsends += sizes[2*i+1];
1391447629fSBarry Smith 
1401447629fSBarry Smith   /* inform other processors of number of messages and max length*/
14176ec1555SBarry Smith   ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr);
1421447629fSBarry Smith 
1431447629fSBarry Smith   /* allocate arrays */
1441447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag1);CHKERRQ(ierr);
1451447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag2);CHKERRQ(ierr);
1461447629fSBarry Smith 
147dcca6d9dSJed Brown   ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr);
148dcca6d9dSJed Brown   ierr = PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2);CHKERRQ(ierr);
1491447629fSBarry Smith 
150dcca6d9dSJed Brown   ierr = PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr);
151dcca6d9dSJed Brown   ierr = PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2);CHKERRQ(ierr);
1521447629fSBarry Smith 
1531447629fSBarry Smith   /* post 1st receives: receive others requests
1541447629fSBarry Smith      since we don't know how long each individual message is we
1551447629fSBarry Smith      allocate the largest needed buffer for each receive. Potentially
1561447629fSBarry Smith      this is a lot of wasted space.
1571447629fSBarry Smith   */
1581447629fSBarry Smith   for (i=0,count=0; i<nreceives; i++) {
1591447629fSBarry Smith     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr);
1601447629fSBarry Smith   }
1611447629fSBarry Smith 
1621447629fSBarry Smith   /* do 1st sends:
1631447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
1641447629fSBarry Smith          the ith processor
1651447629fSBarry Smith   */
1661447629fSBarry Smith   start[0] = 0;
16776ec1555SBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
1681447629fSBarry Smith   for (i=0; i<n; i++) {
1691447629fSBarry Smith     j = owner[i];
1701447629fSBarry Smith     if (j != rank) {
1711447629fSBarry Smith       sindices[start[j]++]  = ia[i];
1721447629fSBarry Smith     } else { /* compute my own map */
1731447629fSBarry Smith       if (ia[i] >= owners[rank] && ia[i] < owners[rank+1]) {
1741447629fSBarry Smith         ia[i] = maploc[ia[i]-owners[rank]];
1751447629fSBarry Smith       } else {
1761447629fSBarry Smith         ia[i] = -1;  /* ia[i] is not in the range of 0 and N-1, maps it to -1 */
1771447629fSBarry Smith       }
1781447629fSBarry Smith     }
1791447629fSBarry Smith   }
1801447629fSBarry Smith 
1811447629fSBarry Smith   start[0] = 0;
18276ec1555SBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
1831447629fSBarry Smith   for (i=0,count=0; i<size; i++) {
18476ec1555SBarry Smith     if (sizes[2*i+1]) {
1851447629fSBarry Smith       /* send my request to others */
18676ec1555SBarry Smith       ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count);CHKERRQ(ierr);
1871447629fSBarry Smith       /* post receive for the answer of my request */
18876ec1555SBarry Smith       ierr = MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);CHKERRQ(ierr);
1891447629fSBarry Smith       count++;
1901447629fSBarry Smith     }
1911447629fSBarry Smith   }
1921447629fSBarry Smith   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
1931447629fSBarry Smith 
1941447629fSBarry Smith   /* wait on 1st sends */
1951447629fSBarry Smith   if (nsends) {
1961447629fSBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1971447629fSBarry Smith   }
1981447629fSBarry Smith 
1991447629fSBarry Smith   /* 1st recvs: other's requests */
2001447629fSBarry Smith   for (j=0; j< nreceives; j++) {
2011447629fSBarry Smith     ierr   = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr); /* idx: index of handle for operation that completed */
2021447629fSBarry Smith     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
2031447629fSBarry Smith     rbuf   = rindices+nmax*widx; /* global index */
2041447629fSBarry Smith     source = recv_status.MPI_SOURCE;
2051447629fSBarry Smith 
2061447629fSBarry Smith     /* compute mapping */
2071447629fSBarry Smith     sbuf = rbuf;
2081447629fSBarry Smith     for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];
2091447629fSBarry Smith 
2101447629fSBarry Smith     /* send mapping back to the sender */
2111447629fSBarry Smith     ierr = MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);CHKERRQ(ierr);
2121447629fSBarry Smith   }
2131447629fSBarry Smith 
2141447629fSBarry Smith   /* wait on 2nd sends */
2151447629fSBarry Smith   if (nreceives) {
2161447629fSBarry Smith     ierr = MPI_Waitall(nreceives,send_waits2,send_status2);CHKERRQ(ierr);
2171447629fSBarry Smith   }
2181447629fSBarry Smith 
2191447629fSBarry Smith   /* 2nd recvs: for the answer of my request */
2201447629fSBarry Smith   for (j=0; j< nsends; j++) {
2211447629fSBarry Smith     ierr   = MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);CHKERRQ(ierr);
2221447629fSBarry Smith     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
2231447629fSBarry Smith     source = recv_status.MPI_SOURCE;
2241447629fSBarry Smith     /* pack output ia[] */
2251447629fSBarry Smith     rbuf  = sindices2+start[source];
2261447629fSBarry Smith     count = 0;
2271447629fSBarry Smith     for (i=0; i<n; i++) {
2281447629fSBarry Smith       if (source == owner[i]) ia[i] = rbuf[count++];
2291447629fSBarry Smith     }
2301447629fSBarry Smith   }
2311447629fSBarry Smith 
2321447629fSBarry Smith   /* free arrays */
23376ec1555SBarry Smith   ierr = PetscFree2(sizes,start);CHKERRQ(ierr);
2341447629fSBarry Smith   ierr = PetscFree(owner);CHKERRQ(ierr);
2351447629fSBarry Smith   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
2361447629fSBarry Smith   ierr = PetscFree2(rindices2,recv_waits2);CHKERRQ(ierr);
2371447629fSBarry Smith   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
2381447629fSBarry Smith   ierr = PetscFree3(sindices2,send_waits2,send_status2);CHKERRQ(ierr);
2391447629fSBarry Smith   PetscFunctionReturn(0);
2401447629fSBarry Smith }
2411447629fSBarry Smith 
2421447629fSBarry Smith PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
2431447629fSBarry Smith {
2441447629fSBarry Smith   PetscErrorCode    ierr;
2451447629fSBarry Smith   AO_MemoryScalable *aomems  = (AO_MemoryScalable*)ao->data;
2461447629fSBarry Smith   PetscInt          *app_loc = aomems->app_loc;
2471447629fSBarry Smith 
2481447629fSBarry Smith   PetscFunctionBegin;
2491447629fSBarry Smith   ierr = AOMap_MemoryScalable_private(ao,n,ia,app_loc);CHKERRQ(ierr);
2501447629fSBarry Smith   PetscFunctionReturn(0);
2511447629fSBarry Smith }
2521447629fSBarry Smith 
2531447629fSBarry Smith PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
2541447629fSBarry Smith {
2551447629fSBarry Smith   PetscErrorCode    ierr;
2561447629fSBarry Smith   AO_MemoryScalable *aomems    = (AO_MemoryScalable*)ao->data;
2571447629fSBarry Smith   PetscInt          *petsc_loc = aomems->petsc_loc;
2581447629fSBarry Smith 
2591447629fSBarry Smith   PetscFunctionBegin;
2601447629fSBarry Smith   ierr = AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);CHKERRQ(ierr);
2611447629fSBarry Smith   PetscFunctionReturn(0);
2621447629fSBarry Smith }
2631447629fSBarry Smith 
2641447629fSBarry Smith static struct _AOOps AOOps_MemoryScalable = {
2651447629fSBarry Smith   AOView_MemoryScalable,
2661447629fSBarry Smith   AODestroy_MemoryScalable,
2671447629fSBarry Smith   AOPetscToApplication_MemoryScalable,
2681447629fSBarry Smith   AOApplicationToPetsc_MemoryScalable,
2691447629fSBarry Smith   0,
2701447629fSBarry Smith   0,
2711447629fSBarry Smith   0,
2721447629fSBarry Smith   0
2731447629fSBarry Smith };
2741447629fSBarry Smith 
2751447629fSBarry Smith PetscErrorCode  AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
2761447629fSBarry Smith {
2771447629fSBarry Smith   PetscErrorCode    ierr;
2781447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
2791447629fSBarry Smith   PetscLayout       map     = aomems->map;
2801447629fSBarry Smith   PetscInt          n_local = map->n,i,j;
2811447629fSBarry Smith   PetscMPIInt       rank,size,tag;
28276ec1555SBarry Smith   PetscInt          *owner,*start,*sizes,nsends,nreceives;
2831447629fSBarry Smith   PetscInt          nmax,count,*sindices,*rindices,idx,lastidx;
2841447629fSBarry Smith   PetscInt          *owners = aomems->map->range;
2851447629fSBarry Smith   MPI_Request       *send_waits,*recv_waits;
2861447629fSBarry Smith   MPI_Status        recv_status;
2871447629fSBarry Smith   PetscMPIInt       nindices,widx;
2881447629fSBarry Smith   PetscInt          *rbuf;
2891447629fSBarry Smith   PetscInt          n=napp,ip,ia;
2901447629fSBarry Smith   MPI_Status        *send_status;
2911447629fSBarry Smith 
2921447629fSBarry Smith   PetscFunctionBegin;
2931447629fSBarry Smith   ierr = PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
2941447629fSBarry Smith 
2951447629fSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2961447629fSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2971447629fSBarry Smith 
2981447629fSBarry Smith   /*  first count number of contributors (of from_array[]) to each processor */
299f628708eSJed Brown   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
300f628708eSJed Brown   ierr = PetscMalloc1(n,&owner);CHKERRQ(ierr);
3011447629fSBarry Smith 
3021447629fSBarry Smith   j       = 0;
3031447629fSBarry Smith   lastidx = -1;
3041447629fSBarry Smith   for (i=0; i<n; i++) {
3051447629fSBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
3061447629fSBarry Smith     if (lastidx > (idx = from_array[i])) j = 0;
3071447629fSBarry Smith     lastidx = idx;
3081447629fSBarry Smith     for (; j<size; j++) {
3091447629fSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
31076ec1555SBarry Smith         sizes[2*j]  += 2; /* num of indices to be sent - in pairs (ip,ia) */
31176ec1555SBarry Smith         sizes[2*j+1] = 1; /* send to proc[j] */
3121447629fSBarry Smith         owner[i]      = j;
3131447629fSBarry Smith         break;
3141447629fSBarry Smith       }
3151447629fSBarry Smith     }
3161447629fSBarry Smith   }
31776ec1555SBarry Smith   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
3181447629fSBarry Smith   nsends        = 0;
31976ec1555SBarry Smith   for (i=0; i<size; i++) nsends += sizes[2*i+1];
3201447629fSBarry Smith 
3211447629fSBarry Smith   /* inform other processors of number of messages and max length*/
32276ec1555SBarry Smith   ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr);
3231447629fSBarry Smith 
3241447629fSBarry Smith   /* allocate arrays */
3251447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag);CHKERRQ(ierr);
326dcca6d9dSJed Brown   ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr);
327dcca6d9dSJed Brown   ierr = PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr);
328785e854fSJed Brown   ierr = PetscMalloc1(size,&start);CHKERRQ(ierr);
3291447629fSBarry Smith 
3301447629fSBarry Smith   /* post receives: */
3311447629fSBarry Smith   for (i=0; i<nreceives; i++) {
3321447629fSBarry Smith     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
3331447629fSBarry Smith   }
3341447629fSBarry Smith 
3351447629fSBarry Smith   /* do sends:
3361447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
3371447629fSBarry Smith          the ith processor
3381447629fSBarry Smith   */
3391447629fSBarry Smith   start[0] = 0;
34076ec1555SBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
3411447629fSBarry Smith   for (i=0; i<n; i++) {
3421447629fSBarry Smith     j = owner[i];
3431447629fSBarry Smith     if (j != rank) {
3441447629fSBarry Smith       ip                   = from_array[i];
3451447629fSBarry Smith       ia                   = to_array[i];
3461447629fSBarry Smith       sindices[start[j]++] = ip;
3471447629fSBarry Smith       sindices[start[j]++] = ia;
3481447629fSBarry Smith     } else { /* compute my own map */
3491447629fSBarry Smith       ip            = from_array[i] - owners[rank];
3501447629fSBarry Smith       ia            = to_array[i];
3511447629fSBarry Smith       aomap_loc[ip] = ia;
3521447629fSBarry Smith     }
3531447629fSBarry Smith   }
3541447629fSBarry Smith 
3551447629fSBarry Smith   start[0] = 0;
35676ec1555SBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
3571447629fSBarry Smith   for (i=0,count=0; i<size; i++) {
35876ec1555SBarry Smith     if (sizes[2*i+1]) {
35976ec1555SBarry Smith       ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRQ(ierr);
3601447629fSBarry Smith       count++;
3611447629fSBarry Smith     }
3621447629fSBarry Smith   }
3631447629fSBarry Smith   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
3641447629fSBarry Smith 
3651447629fSBarry Smith   /* wait on sends */
3661447629fSBarry Smith   if (nsends) {
3671447629fSBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
3681447629fSBarry Smith   }
3691447629fSBarry Smith 
3701447629fSBarry Smith   /* recvs */
3711447629fSBarry Smith   count=0;
3721447629fSBarry Smith   for (j= nreceives; j>0; j--) {
3731447629fSBarry Smith     ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr);
3741447629fSBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
3751447629fSBarry Smith     rbuf = rindices+nmax*widx; /* global index */
3761447629fSBarry Smith 
3771447629fSBarry Smith     /* compute local mapping */
3781447629fSBarry Smith     for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
3791447629fSBarry Smith       ip            = rbuf[i] - owners[rank]; /* local index */
3801447629fSBarry Smith       ia            = rbuf[i+1];
3811447629fSBarry Smith       aomap_loc[ip] = ia;
3821447629fSBarry Smith     }
3831447629fSBarry Smith     count++;
3841447629fSBarry Smith   }
3851447629fSBarry Smith 
3861447629fSBarry Smith   ierr = PetscFree(start);CHKERRQ(ierr);
3871447629fSBarry Smith   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
3881447629fSBarry Smith   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
3891447629fSBarry Smith   ierr = PetscFree(owner);CHKERRQ(ierr);
39076ec1555SBarry Smith   ierr = PetscFree(sizes);CHKERRQ(ierr);
3911447629fSBarry Smith   PetscFunctionReturn(0);
3921447629fSBarry Smith }
3931447629fSBarry Smith 
3948cc058d9SJed Brown PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
3951447629fSBarry Smith {
3961447629fSBarry Smith   PetscErrorCode    ierr;
3971447629fSBarry Smith   IS                isapp=ao->isapp,ispetsc=ao->ispetsc;
3981447629fSBarry Smith   const PetscInt    *mypetsc,*myapp;
3991447629fSBarry Smith   PetscInt          napp,n_local,N,i,start,*petsc,*lens,*disp;
4001447629fSBarry Smith   MPI_Comm          comm;
4011447629fSBarry Smith   AO_MemoryScalable *aomems;
4021447629fSBarry Smith   PetscLayout       map;
4031447629fSBarry Smith   PetscMPIInt       size,rank;
4041447629fSBarry Smith 
4051447629fSBarry Smith   PetscFunctionBegin;
40601e608bcSBarry Smith   if (!isapp) SETERRQ(PetscObjectComm((PetscObject)ao),PETSC_ERR_ARG_WRONGSTATE,"AOSetIS() must be called before AOSetType()");
4071447629fSBarry Smith   /* create special struct aomems */
408b00a9115SJed Brown   ierr     = PetscNewLog(ao,&aomems);CHKERRQ(ierr);
4091447629fSBarry Smith   ao->data = (void*) aomems;
4101447629fSBarry Smith   ierr     = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr);
4111447629fSBarry Smith   ierr     = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
4121447629fSBarry Smith 
4131447629fSBarry Smith   /* transmit all local lengths of isapp to all processors */
4141447629fSBarry Smith   ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
4151447629fSBarry Smith   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
4161447629fSBarry Smith   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
417dcca6d9dSJed Brown   ierr = PetscMalloc2(size,&lens,size,&disp);CHKERRQ(ierr);
4181447629fSBarry Smith   ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr);
4191447629fSBarry Smith   ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRQ(ierr);
4201447629fSBarry Smith 
4211447629fSBarry Smith   N = 0;
4221447629fSBarry Smith   for (i = 0; i < size; i++) {
4231447629fSBarry Smith     disp[i] = N;
4241447629fSBarry Smith     N      += lens[i];
4251447629fSBarry Smith   }
4261447629fSBarry Smith 
4271447629fSBarry Smith   /* If ispetsc is 0 then use "natural" numbering */
4281447629fSBarry Smith   if (napp) {
4291447629fSBarry Smith     if (!ispetsc) {
4301447629fSBarry Smith       start = disp[rank];
431854ce69bSBarry Smith       ierr  = PetscMalloc1(napp+1, &petsc);CHKERRQ(ierr);
4321447629fSBarry Smith       for (i=0; i<napp; i++) petsc[i] = start + i;
4331447629fSBarry Smith     } else {
4341447629fSBarry Smith       ierr  = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
4351447629fSBarry Smith       petsc = (PetscInt*)mypetsc;
4361447629fSBarry Smith     }
4374a2f8832SBarry Smith   } else {
4384a2f8832SBarry Smith     petsc = NULL;
4391447629fSBarry Smith   }
4401447629fSBarry Smith 
4411447629fSBarry Smith   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
4421447629fSBarry Smith   ierr    = PetscLayoutCreate(comm,&map);CHKERRQ(ierr);
4431447629fSBarry Smith   map->bs = 1;
4441447629fSBarry Smith   map->N  = N;
4451447629fSBarry Smith   ierr    = PetscLayoutSetUp(map);CHKERRQ(ierr);
4461447629fSBarry Smith 
4471447629fSBarry Smith   ao->N       = N;
4481447629fSBarry Smith   ao->n       = map->n;
4491447629fSBarry Smith   aomems->map = map;
4501447629fSBarry Smith 
4511447629fSBarry Smith   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
4521447629fSBarry Smith   n_local = map->n;
453dcca6d9dSJed Brown   ierr    = PetscMalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);CHKERRQ(ierr);
4543bb1ff40SBarry Smith   ierr    = PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr);
4551447629fSBarry Smith   ierr    = PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
4561447629fSBarry Smith   ierr    = PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
4571447629fSBarry Smith   ierr    = ISGetIndices(isapp,&myapp);CHKERRQ(ierr);
4581447629fSBarry Smith 
4591447629fSBarry Smith   ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr);
4601447629fSBarry Smith   ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr);
4611447629fSBarry Smith 
4621447629fSBarry Smith   ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr);
4631447629fSBarry Smith   if (napp) {
4641447629fSBarry Smith     if (ispetsc) {
4651447629fSBarry Smith       ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
4661447629fSBarry Smith     } else {
4671447629fSBarry Smith       ierr = PetscFree(petsc);CHKERRQ(ierr);
4681447629fSBarry Smith     }
4691447629fSBarry Smith   }
4701447629fSBarry Smith   ierr = PetscFree2(lens,disp);CHKERRQ(ierr);
4711447629fSBarry Smith   PetscFunctionReturn(0);
4721447629fSBarry Smith }
4731447629fSBarry Smith 
4741447629fSBarry Smith /*@C
4751447629fSBarry Smith    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
4761447629fSBarry Smith 
4771447629fSBarry Smith    Collective on MPI_Comm
4781447629fSBarry Smith 
4791447629fSBarry Smith    Input Parameters:
4801447629fSBarry Smith +  comm - MPI communicator that is to share AO
4811447629fSBarry Smith .  napp - size of integer arrays
4821447629fSBarry Smith .  myapp - integer array that defines an ordering
4831447629fSBarry Smith -  mypetsc - integer array that defines another ordering (may be NULL to
4841447629fSBarry Smith              indicate the natural ordering, that is 0,1,2,3,...)
4851447629fSBarry Smith 
4861447629fSBarry Smith    Output Parameter:
4871447629fSBarry Smith .  aoout - the new application ordering
4881447629fSBarry Smith 
4891447629fSBarry Smith    Level: beginner
4901447629fSBarry Smith 
491*95452b02SPatrick Sanan     Notes:
492*95452b02SPatrick Sanan     The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
4931447629fSBarry Smith            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
4941447629fSBarry Smith            Comparing with AOCreateBasic(), this routine trades memory with message communication.
4951447629fSBarry Smith 
4961447629fSBarry Smith .keywords: AO, create
4971447629fSBarry Smith 
4981447629fSBarry Smith .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
4991447629fSBarry Smith @*/
5001447629fSBarry Smith PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
5011447629fSBarry Smith {
5021447629fSBarry Smith   PetscErrorCode ierr;
5031447629fSBarry Smith   IS             isapp,ispetsc;
5041447629fSBarry Smith   const PetscInt *app=myapp,*petsc=mypetsc;
5051447629fSBarry Smith 
5061447629fSBarry Smith   PetscFunctionBegin;
5071447629fSBarry Smith   ierr = ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);CHKERRQ(ierr);
5081447629fSBarry Smith   if (mypetsc) {
5091447629fSBarry Smith     ierr = ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);CHKERRQ(ierr);
5101447629fSBarry Smith   } else {
5111447629fSBarry Smith     ispetsc = NULL;
5121447629fSBarry Smith   }
5131447629fSBarry Smith   ierr = AOCreateMemoryScalableIS(isapp,ispetsc,aoout);CHKERRQ(ierr);
5141447629fSBarry Smith   ierr = ISDestroy(&isapp);CHKERRQ(ierr);
5151447629fSBarry Smith   if (mypetsc) {
5161447629fSBarry Smith     ierr = ISDestroy(&ispetsc);CHKERRQ(ierr);
5171447629fSBarry Smith   }
5181447629fSBarry Smith   PetscFunctionReturn(0);
5191447629fSBarry Smith }
5201447629fSBarry Smith 
5211447629fSBarry Smith /*@C
5221447629fSBarry Smith    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
5231447629fSBarry Smith 
5241447629fSBarry Smith    Collective on IS
5251447629fSBarry Smith 
5261447629fSBarry Smith    Input Parameters:
5271447629fSBarry Smith +  isapp - index set that defines an ordering
5281447629fSBarry Smith -  ispetsc - index set that defines another ordering (may be NULL to use the
5291447629fSBarry Smith              natural ordering)
5301447629fSBarry Smith 
5311447629fSBarry Smith    Output Parameter:
5321447629fSBarry Smith .  aoout - the new application ordering
5331447629fSBarry Smith 
5341447629fSBarry Smith    Level: beginner
5351447629fSBarry Smith 
536*95452b02SPatrick Sanan     Notes:
537*95452b02SPatrick Sanan     The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
5381447629fSBarry Smith            that is there cannot be any "holes".
5391447629fSBarry Smith            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
5401447629fSBarry Smith .keywords: AO, create
5411447629fSBarry Smith 
5421447629fSBarry Smith .seealso: AOCreateMemoryScalable(),  AODestroy()
5431447629fSBarry Smith @*/
5441447629fSBarry Smith PetscErrorCode  AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
5451447629fSBarry Smith {
5461447629fSBarry Smith   PetscErrorCode ierr;
5471447629fSBarry Smith   MPI_Comm       comm;
5481447629fSBarry Smith   AO             ao;
5491447629fSBarry Smith 
5501447629fSBarry Smith   PetscFunctionBegin;
5511447629fSBarry Smith   ierr   = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
5521447629fSBarry Smith   ierr   = AOCreate(comm,&ao);CHKERRQ(ierr);
5531447629fSBarry Smith   ierr   = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr);
5541447629fSBarry Smith   ierr   = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
555817ea411SJed Brown   ierr   = AOViewFromOptions(ao,NULL,"-ao_view");CHKERRQ(ierr);
5561447629fSBarry Smith   *aoout = ao;
5571447629fSBarry Smith   PetscFunctionReturn(0);
5581447629fSBarry Smith }
559