11447629fSBarry Smith 21447629fSBarry Smith /* 31447629fSBarry Smith The memory scalable AO application ordering routines. These store the 46bd6ae52SBarry Smith orderings on each processor for that processor's range of values 51447629fSBarry Smith */ 61447629fSBarry Smith 71447629fSBarry Smith #include <../src/vec/is/ao/aoimpl.h> /*I "petscao.h" I*/ 81447629fSBarry Smith 91447629fSBarry Smith typedef struct { 101447629fSBarry Smith PetscInt *app_loc; /* app_loc[i] is the partner for the ith local PETSc slot */ 111447629fSBarry Smith PetscInt *petsc_loc; /* petsc_loc[j] is the partner for the jth local app slot */ 121447629fSBarry Smith PetscLayout map; /* determines the local sizes of ao */ 131447629fSBarry Smith } AO_MemoryScalable; 141447629fSBarry Smith 151447629fSBarry Smith /* 166bd6ae52SBarry Smith All processors ship the data to process 0 to be printed; note that this is not scalable because 176bd6ae52SBarry Smith process 0 allocates space for all the orderings entry across all the processes 181447629fSBarry Smith */ 191447629fSBarry Smith PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer) 201447629fSBarry Smith { 211447629fSBarry Smith PetscErrorCode ierr; 221447629fSBarry Smith PetscMPIInt rank,size; 231447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 241447629fSBarry Smith PetscBool iascii; 251447629fSBarry Smith PetscMPIInt tag_app,tag_petsc; 261447629fSBarry Smith PetscLayout map = aomems->map; 271447629fSBarry Smith PetscInt *app,*app_loc,*petsc,*petsc_loc,len,i,j; 281447629fSBarry Smith MPI_Status status; 291447629fSBarry Smith 301447629fSBarry Smith PetscFunctionBegin; 311447629fSBarry Smith ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 32*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(!iascii,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name); 331447629fSBarry Smith 34ffc4695bSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);CHKERRMPI(ierr); 35ffc4695bSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);CHKERRMPI(ierr); 361447629fSBarry Smith 371447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_app);CHKERRQ(ierr); 381447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);CHKERRQ(ierr); 391447629fSBarry Smith 40dd400576SPatrick Sanan if (rank == 0) { 412abc8c78SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %" PetscInt_FMT "\n",ao->N);CHKERRQ(ierr); 421447629fSBarry Smith ierr = PetscViewerASCIIPrintf(viewer, "PETSc->App App->PETSc\n");CHKERRQ(ierr); 431447629fSBarry Smith 44dcca6d9dSJed Brown ierr = PetscMalloc2(map->N,&app,map->N,&petsc);CHKERRQ(ierr); 451447629fSBarry Smith len = map->n; 461447629fSBarry Smith /* print local AO */ 476bd6ae52SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Process [%d]\n",rank);CHKERRQ(ierr); 481447629fSBarry Smith for (i=0; i<len; i++) { 492abc8c78SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer,"%3" PetscInt_FMT " %3" PetscInt_FMT " %3" PetscInt_FMT " %3" PetscInt_FMT "\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);CHKERRQ(ierr); 501447629fSBarry Smith } 511447629fSBarry Smith 521447629fSBarry Smith /* recv and print off-processor's AO */ 531447629fSBarry Smith for (i=1; i<size; i++) { 541447629fSBarry Smith len = map->range[i+1] - map->range[i]; 551447629fSBarry Smith app_loc = app + map->range[i]; 561447629fSBarry Smith petsc_loc = petsc+ map->range[i]; 5755b25c41SPierre Jolivet ierr = MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);CHKERRMPI(ierr); 5855b25c41SPierre Jolivet ierr = MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);CHKERRMPI(ierr); 592abc8c78SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer,"Process [%" PetscInt_FMT "]\n",i);CHKERRQ(ierr); 601447629fSBarry Smith for (j=0; j<len; j++) { 612abc8c78SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer,"%3" PetscInt_FMT " %3" PetscInt_FMT " %3" PetscInt_FMT " %3" PetscInt_FMT "\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);CHKERRQ(ierr); 621447629fSBarry Smith } 631447629fSBarry Smith } 641447629fSBarry Smith ierr = PetscFree2(app,petsc);CHKERRQ(ierr); 651447629fSBarry Smith 661447629fSBarry Smith } else { 671447629fSBarry Smith /* send values */ 68ffc4695bSBarry Smith ierr = MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));CHKERRMPI(ierr); 69ffc4695bSBarry Smith ierr = MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));CHKERRMPI(ierr); 701447629fSBarry Smith } 711447629fSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 721447629fSBarry Smith PetscFunctionReturn(0); 731447629fSBarry Smith } 741447629fSBarry Smith 751447629fSBarry Smith PetscErrorCode AODestroy_MemoryScalable(AO ao) 761447629fSBarry Smith { 771447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 781447629fSBarry Smith PetscErrorCode ierr; 791447629fSBarry Smith 801447629fSBarry Smith PetscFunctionBegin; 811447629fSBarry Smith ierr = PetscFree2(aomems->app_loc,aomems->petsc_loc);CHKERRQ(ierr); 821447629fSBarry Smith ierr = PetscLayoutDestroy(&aomems->map);CHKERRQ(ierr); 831447629fSBarry Smith ierr = PetscFree(aomems);CHKERRQ(ierr); 841447629fSBarry Smith PetscFunctionReturn(0); 851447629fSBarry Smith } 861447629fSBarry Smith 871447629fSBarry Smith /* 881447629fSBarry Smith Input Parameters: 891447629fSBarry Smith + ao - the application ordering context 901447629fSBarry Smith . n - the number of integers in ia[] 911447629fSBarry Smith . ia - the integers; these are replaced with their mapped value 921447629fSBarry Smith - maploc - app_loc or petsc_loc in struct "AO_MemoryScalable" 931447629fSBarry Smith 941447629fSBarry Smith Output Parameter: 951447629fSBarry Smith . ia - the mapped interges 961447629fSBarry Smith */ 976bd6ae52SBarry Smith PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,const PetscInt *maploc) 981447629fSBarry Smith { 991447629fSBarry Smith PetscErrorCode ierr; 1001447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 1011447629fSBarry Smith MPI_Comm comm; 1021447629fSBarry Smith PetscMPIInt rank,size,tag1,tag2; 10376ec1555SBarry Smith PetscInt *owner,*start,*sizes,nsends,nreceives; 1041447629fSBarry Smith PetscInt nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2; 1056bd6ae52SBarry Smith const PetscInt *owners = aomems->map->range; 1061447629fSBarry Smith MPI_Request *send_waits,*recv_waits,*send_waits2,*recv_waits2; 1071447629fSBarry Smith MPI_Status recv_status; 1081447629fSBarry Smith PetscMPIInt nindices,source,widx; 1091447629fSBarry Smith PetscInt *rbuf,*sbuf; 1101447629fSBarry Smith MPI_Status *send_status,*send_status2; 1111447629fSBarry Smith 1121447629fSBarry Smith PetscFunctionBegin; 1131447629fSBarry Smith ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr); 114ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 115ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1161447629fSBarry Smith 1171447629fSBarry Smith /* first count number of contributors to each processor */ 118071fcb05SBarry Smith ierr = PetscMalloc1(size,&start);CHKERRQ(ierr); 119071fcb05SBarry Smith ierr = PetscCalloc2(2*size,&sizes,n,&owner);CHKERRQ(ierr); 1201447629fSBarry Smith 1211447629fSBarry Smith j = 0; 1221447629fSBarry Smith lastidx = -1; 1231447629fSBarry Smith for (i=0; i<n; i++) { 1246bd6ae52SBarry Smith if (ia[i] < 0) owner[i] = -1; /* mark negative entries (which are not to be mapped) with a special negative value */ 1256bd6ae52SBarry Smith if (ia[i] >= ao->N) owner[i] = -2; /* mark out of range entries with special negative value */ 1266bd6ae52SBarry Smith else { 1271447629fSBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 1281447629fSBarry Smith if (lastidx > (idx = ia[i])) j = 0; 1291447629fSBarry Smith lastidx = idx; 1301447629fSBarry Smith for (; j<size; j++) { 1311447629fSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 13276ec1555SBarry Smith sizes[2*j]++; /* num of indices to be sent */ 13376ec1555SBarry Smith sizes[2*j+1] = 1; /* send to proc[j] */ 1341447629fSBarry Smith owner[i] = j; 1351447629fSBarry Smith break; 1361447629fSBarry Smith } 1371447629fSBarry Smith } 1381447629fSBarry Smith } 1396bd6ae52SBarry Smith } 14076ec1555SBarry Smith sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */ 1411447629fSBarry Smith nsends = 0; 14276ec1555SBarry Smith for (i=0; i<size; i++) nsends += sizes[2*i+1]; 1431447629fSBarry Smith 1441447629fSBarry Smith /* inform other processors of number of messages and max length*/ 14576ec1555SBarry Smith ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr); 1461447629fSBarry Smith 1471447629fSBarry Smith /* allocate arrays */ 1481447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag1);CHKERRQ(ierr); 1491447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag2);CHKERRQ(ierr); 1501447629fSBarry Smith 151dcca6d9dSJed Brown ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr); 152dcca6d9dSJed Brown ierr = PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2);CHKERRQ(ierr); 1531447629fSBarry Smith 154dcca6d9dSJed Brown ierr = PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr); 155dcca6d9dSJed Brown ierr = PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2);CHKERRQ(ierr); 1561447629fSBarry Smith 1571447629fSBarry Smith /* post 1st receives: receive others requests 1581447629fSBarry Smith since we don't know how long each individual message is we 1591447629fSBarry Smith allocate the largest needed buffer for each receive. Potentially 1601447629fSBarry Smith this is a lot of wasted space. 1611447629fSBarry Smith */ 1621447629fSBarry Smith for (i=0,count=0; i<nreceives; i++) { 163ffc4695bSBarry Smith ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRMPI(ierr); 1641447629fSBarry Smith } 1651447629fSBarry Smith 1661447629fSBarry Smith /* do 1st sends: 1671447629fSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1681447629fSBarry Smith the ith processor 1691447629fSBarry Smith */ 1701447629fSBarry Smith start[0] = 0; 17176ec1555SBarry Smith for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2]; 1721447629fSBarry Smith for (i=0; i<n; i++) { 1731447629fSBarry Smith j = owner[i]; 1746bd6ae52SBarry Smith if (j == -1) continue; /* do not remap negative entries in ia[] */ 1756bd6ae52SBarry Smith else if (j == -2) { /* out of range entries get mapped to -1 */ 1766bd6ae52SBarry Smith ia[i] = -1; 1776bd6ae52SBarry Smith continue; 1786bd6ae52SBarry Smith } else if (j != rank) { 1791447629fSBarry Smith sindices[start[j]++] = ia[i]; 1801447629fSBarry Smith } else { /* compute my own map */ 1811447629fSBarry Smith ia[i] = maploc[ia[i]-owners[rank]]; 1821447629fSBarry Smith } 1831447629fSBarry Smith } 1841447629fSBarry Smith 1851447629fSBarry Smith start[0] = 0; 18676ec1555SBarry Smith for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2]; 1871447629fSBarry Smith for (i=0,count=0; i<size; i++) { 18876ec1555SBarry Smith if (sizes[2*i+1]) { 1891447629fSBarry Smith /* send my request to others */ 190ffc4695bSBarry Smith ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count);CHKERRMPI(ierr); 1911447629fSBarry Smith /* post receive for the answer of my request */ 192ffc4695bSBarry Smith ierr = MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);CHKERRMPI(ierr); 1931447629fSBarry Smith count++; 1941447629fSBarry Smith } 1951447629fSBarry Smith } 196*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(nsends != count,comm,PETSC_ERR_SUP,"nsends %" PetscInt_FMT " != count %" PetscInt_FMT,nsends,count); 1971447629fSBarry Smith 1981447629fSBarry Smith /* wait on 1st sends */ 1991447629fSBarry Smith if (nsends) { 200ffc4695bSBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRMPI(ierr); 2011447629fSBarry Smith } 2021447629fSBarry Smith 2031447629fSBarry Smith /* 1st recvs: other's requests */ 2041447629fSBarry Smith for (j=0; j< nreceives; j++) { 20555b25c41SPierre Jolivet ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRMPI(ierr); /* idx: index of handle for operation that completed */ 20655b25c41SPierre Jolivet ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRMPI(ierr); 2071447629fSBarry Smith rbuf = rindices+nmax*widx; /* global index */ 2081447629fSBarry Smith source = recv_status.MPI_SOURCE; 2091447629fSBarry Smith 2101447629fSBarry Smith /* compute mapping */ 2111447629fSBarry Smith sbuf = rbuf; 2121447629fSBarry Smith for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]]; 2131447629fSBarry Smith 2141447629fSBarry Smith /* send mapping back to the sender */ 215ffc4695bSBarry Smith ierr = MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);CHKERRMPI(ierr); 2161447629fSBarry Smith } 2171447629fSBarry Smith 2181447629fSBarry Smith /* wait on 2nd sends */ 2191447629fSBarry Smith if (nreceives) { 220ffc4695bSBarry Smith ierr = MPI_Waitall(nreceives,send_waits2,send_status2);CHKERRMPI(ierr); 2211447629fSBarry Smith } 2221447629fSBarry Smith 2231447629fSBarry Smith /* 2nd recvs: for the answer of my request */ 2241447629fSBarry Smith for (j=0; j< nsends; j++) { 22555b25c41SPierre Jolivet ierr = MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);CHKERRMPI(ierr); 22655b25c41SPierre Jolivet ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRMPI(ierr); 2271447629fSBarry Smith source = recv_status.MPI_SOURCE; 2281447629fSBarry Smith /* pack output ia[] */ 2291447629fSBarry Smith rbuf = sindices2+start[source]; 2301447629fSBarry Smith count = 0; 2311447629fSBarry Smith for (i=0; i<n; i++) { 2321447629fSBarry Smith if (source == owner[i]) ia[i] = rbuf[count++]; 2331447629fSBarry Smith } 2341447629fSBarry Smith } 2351447629fSBarry Smith 2361447629fSBarry Smith /* free arrays */ 237071fcb05SBarry Smith ierr = PetscFree(start);CHKERRQ(ierr); 238071fcb05SBarry Smith ierr = PetscFree2(sizes,owner);CHKERRQ(ierr); 2391447629fSBarry Smith ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr); 2401447629fSBarry Smith ierr = PetscFree2(rindices2,recv_waits2);CHKERRQ(ierr); 2411447629fSBarry Smith ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr); 2421447629fSBarry Smith ierr = PetscFree3(sindices2,send_waits2,send_status2);CHKERRQ(ierr); 2431447629fSBarry Smith PetscFunctionReturn(0); 2441447629fSBarry Smith } 2451447629fSBarry Smith 2461447629fSBarry Smith PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia) 2471447629fSBarry Smith { 2481447629fSBarry Smith PetscErrorCode ierr; 2491447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 2501447629fSBarry Smith PetscInt *app_loc = aomems->app_loc; 2511447629fSBarry Smith 2521447629fSBarry Smith PetscFunctionBegin; 2531447629fSBarry Smith ierr = AOMap_MemoryScalable_private(ao,n,ia,app_loc);CHKERRQ(ierr); 2541447629fSBarry Smith PetscFunctionReturn(0); 2551447629fSBarry Smith } 2561447629fSBarry Smith 2571447629fSBarry Smith PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia) 2581447629fSBarry Smith { 2591447629fSBarry Smith PetscErrorCode ierr; 2601447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 2611447629fSBarry Smith PetscInt *petsc_loc = aomems->petsc_loc; 2621447629fSBarry Smith 2631447629fSBarry Smith PetscFunctionBegin; 2641447629fSBarry Smith ierr = AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);CHKERRQ(ierr); 2651447629fSBarry Smith PetscFunctionReturn(0); 2661447629fSBarry Smith } 2671447629fSBarry Smith 2681447629fSBarry Smith static struct _AOOps AOOps_MemoryScalable = { 269267267bdSJacob Faibussowitsch PetscDesignatedInitializer(view,AOView_MemoryScalable), 270267267bdSJacob Faibussowitsch PetscDesignatedInitializer(destroy,AODestroy_MemoryScalable), 271267267bdSJacob Faibussowitsch PetscDesignatedInitializer(petsctoapplication,AOPetscToApplication_MemoryScalable), 272267267bdSJacob Faibussowitsch PetscDesignatedInitializer(applicationtopetsc,AOApplicationToPetsc_MemoryScalable), 2731447629fSBarry Smith }; 2741447629fSBarry Smith 2751447629fSBarry Smith PetscErrorCode AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc) 2761447629fSBarry Smith { 2771447629fSBarry Smith PetscErrorCode ierr; 2781447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 2791447629fSBarry Smith PetscLayout map = aomems->map; 2801447629fSBarry Smith PetscInt n_local = map->n,i,j; 2811447629fSBarry Smith PetscMPIInt rank,size,tag; 28276ec1555SBarry Smith PetscInt *owner,*start,*sizes,nsends,nreceives; 2831447629fSBarry Smith PetscInt nmax,count,*sindices,*rindices,idx,lastidx; 2841447629fSBarry Smith PetscInt *owners = aomems->map->range; 2851447629fSBarry Smith MPI_Request *send_waits,*recv_waits; 2861447629fSBarry Smith MPI_Status recv_status; 2871447629fSBarry Smith PetscMPIInt nindices,widx; 2881447629fSBarry Smith PetscInt *rbuf; 2891447629fSBarry Smith PetscInt n=napp,ip,ia; 2901447629fSBarry Smith MPI_Status *send_status; 2911447629fSBarry Smith 2921447629fSBarry Smith PetscFunctionBegin; 293580bdb30SBarry Smith ierr = PetscArrayzero(aomap_loc,n_local);CHKERRQ(ierr); 2941447629fSBarry Smith 295ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 296ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 2971447629fSBarry Smith 2981447629fSBarry Smith /* first count number of contributors (of from_array[]) to each processor */ 299f628708eSJed Brown ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 300f628708eSJed Brown ierr = PetscMalloc1(n,&owner);CHKERRQ(ierr); 3011447629fSBarry Smith 3021447629fSBarry Smith j = 0; 3031447629fSBarry Smith lastidx = -1; 3041447629fSBarry Smith for (i=0; i<n; i++) { 3051447629fSBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 3061447629fSBarry Smith if (lastidx > (idx = from_array[i])) j = 0; 3071447629fSBarry Smith lastidx = idx; 3081447629fSBarry Smith for (; j<size; j++) { 3091447629fSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 31076ec1555SBarry Smith sizes[2*j] += 2; /* num of indices to be sent - in pairs (ip,ia) */ 31176ec1555SBarry Smith sizes[2*j+1] = 1; /* send to proc[j] */ 3121447629fSBarry Smith owner[i] = j; 3131447629fSBarry Smith break; 3141447629fSBarry Smith } 3151447629fSBarry Smith } 3161447629fSBarry Smith } 31776ec1555SBarry Smith sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */ 3181447629fSBarry Smith nsends = 0; 31976ec1555SBarry Smith for (i=0; i<size; i++) nsends += sizes[2*i+1]; 3201447629fSBarry Smith 3211447629fSBarry Smith /* inform other processors of number of messages and max length*/ 32276ec1555SBarry Smith ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr); 3231447629fSBarry Smith 3241447629fSBarry Smith /* allocate arrays */ 3251447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag);CHKERRQ(ierr); 326dcca6d9dSJed Brown ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr); 327dcca6d9dSJed Brown ierr = PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr); 328785e854fSJed Brown ierr = PetscMalloc1(size,&start);CHKERRQ(ierr); 3291447629fSBarry Smith 3301447629fSBarry Smith /* post receives: */ 3311447629fSBarry Smith for (i=0; i<nreceives; i++) { 332ffc4695bSBarry Smith ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRMPI(ierr); 3331447629fSBarry Smith } 3341447629fSBarry Smith 3351447629fSBarry Smith /* do sends: 3361447629fSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3371447629fSBarry Smith the ith processor 3381447629fSBarry Smith */ 3391447629fSBarry Smith start[0] = 0; 34076ec1555SBarry Smith for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2]; 3411447629fSBarry Smith for (i=0; i<n; i++) { 3421447629fSBarry Smith j = owner[i]; 3431447629fSBarry Smith if (j != rank) { 3441447629fSBarry Smith ip = from_array[i]; 3451447629fSBarry Smith ia = to_array[i]; 3461447629fSBarry Smith sindices[start[j]++] = ip; 3471447629fSBarry Smith sindices[start[j]++] = ia; 3481447629fSBarry Smith } else { /* compute my own map */ 3491447629fSBarry Smith ip = from_array[i] - owners[rank]; 3501447629fSBarry Smith ia = to_array[i]; 3511447629fSBarry Smith aomap_loc[ip] = ia; 3521447629fSBarry Smith } 3531447629fSBarry Smith } 3541447629fSBarry Smith 3551447629fSBarry Smith start[0] = 0; 35676ec1555SBarry Smith for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2]; 3571447629fSBarry Smith for (i=0,count=0; i<size; i++) { 35876ec1555SBarry Smith if (sizes[2*i+1]) { 359ffc4695bSBarry Smith ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRMPI(ierr); 3601447629fSBarry Smith count++; 3611447629fSBarry Smith } 3621447629fSBarry Smith } 363*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(nsends != count,comm,PETSC_ERR_SUP,"nsends %" PetscInt_FMT " != count %" PetscInt_FMT,nsends,count); 3641447629fSBarry Smith 3651447629fSBarry Smith /* wait on sends */ 3661447629fSBarry Smith if (nsends) { 367ffc4695bSBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRMPI(ierr); 3681447629fSBarry Smith } 3691447629fSBarry Smith 3701447629fSBarry Smith /* recvs */ 3711447629fSBarry Smith count=0; 3721447629fSBarry Smith for (j= nreceives; j>0; j--) { 373ffc4695bSBarry Smith ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRMPI(ierr); 374ffc4695bSBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRMPI(ierr); 3751447629fSBarry Smith rbuf = rindices+nmax*widx; /* global index */ 3761447629fSBarry Smith 3771447629fSBarry Smith /* compute local mapping */ 3781447629fSBarry Smith for (i=0; i<nindices; i+=2) { /* pack aomap_loc */ 3791447629fSBarry Smith ip = rbuf[i] - owners[rank]; /* local index */ 3801447629fSBarry Smith ia = rbuf[i+1]; 3811447629fSBarry Smith aomap_loc[ip] = ia; 3821447629fSBarry Smith } 3831447629fSBarry Smith count++; 3841447629fSBarry Smith } 3851447629fSBarry Smith 3861447629fSBarry Smith ierr = PetscFree(start);CHKERRQ(ierr); 3871447629fSBarry Smith ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr); 3881447629fSBarry Smith ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr); 3891447629fSBarry Smith ierr = PetscFree(owner);CHKERRQ(ierr); 39076ec1555SBarry Smith ierr = PetscFree(sizes);CHKERRQ(ierr); 3911447629fSBarry Smith PetscFunctionReturn(0); 3921447629fSBarry Smith } 3931447629fSBarry Smith 3948cc058d9SJed Brown PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao) 3951447629fSBarry Smith { 3961447629fSBarry Smith PetscErrorCode ierr; 3971447629fSBarry Smith IS isapp=ao->isapp,ispetsc=ao->ispetsc; 3981447629fSBarry Smith const PetscInt *mypetsc,*myapp; 3991447629fSBarry Smith PetscInt napp,n_local,N,i,start,*petsc,*lens,*disp; 4001447629fSBarry Smith MPI_Comm comm; 4011447629fSBarry Smith AO_MemoryScalable *aomems; 4021447629fSBarry Smith PetscLayout map; 4031447629fSBarry Smith PetscMPIInt size,rank; 4041447629fSBarry Smith 4051447629fSBarry Smith PetscFunctionBegin; 406*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(!isapp,PetscObjectComm((PetscObject)ao),PETSC_ERR_ARG_WRONGSTATE,"AOSetIS() must be called before AOSetType()"); 4071447629fSBarry Smith /* create special struct aomems */ 408b00a9115SJed Brown ierr = PetscNewLog(ao,&aomems);CHKERRQ(ierr); 4091447629fSBarry Smith ao->data = (void*) aomems; 4101447629fSBarry Smith ierr = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr); 4111447629fSBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr); 4121447629fSBarry Smith 4131447629fSBarry Smith /* transmit all local lengths of isapp to all processors */ 4141447629fSBarry Smith ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr); 415ffc4695bSBarry Smith ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 416ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 417dcca6d9dSJed Brown ierr = PetscMalloc2(size,&lens,size,&disp);CHKERRQ(ierr); 4181447629fSBarry Smith ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr); 419ffc4695bSBarry Smith ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRMPI(ierr); 4201447629fSBarry Smith 4211447629fSBarry Smith N = 0; 4221447629fSBarry Smith for (i = 0; i < size; i++) { 4231447629fSBarry Smith disp[i] = N; 4241447629fSBarry Smith N += lens[i]; 4251447629fSBarry Smith } 4261447629fSBarry Smith 4271447629fSBarry Smith /* If ispetsc is 0 then use "natural" numbering */ 4281447629fSBarry Smith if (napp) { 4291447629fSBarry Smith if (!ispetsc) { 4301447629fSBarry Smith start = disp[rank]; 431854ce69bSBarry Smith ierr = PetscMalloc1(napp+1, &petsc);CHKERRQ(ierr); 4321447629fSBarry Smith for (i=0; i<napp; i++) petsc[i] = start + i; 4331447629fSBarry Smith } else { 4341447629fSBarry Smith ierr = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr); 4351447629fSBarry Smith petsc = (PetscInt*)mypetsc; 4361447629fSBarry Smith } 4374a2f8832SBarry Smith } else { 4384a2f8832SBarry Smith petsc = NULL; 4391447629fSBarry Smith } 4401447629fSBarry Smith 4411447629fSBarry Smith /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */ 4421447629fSBarry Smith ierr = PetscLayoutCreate(comm,&map);CHKERRQ(ierr); 4431447629fSBarry Smith map->bs = 1; 4441447629fSBarry Smith map->N = N; 4451447629fSBarry Smith ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4461447629fSBarry Smith 4471447629fSBarry Smith ao->N = N; 4481447629fSBarry Smith ao->n = map->n; 4491447629fSBarry Smith aomems->map = map; 4501447629fSBarry Smith 4511447629fSBarry Smith /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */ 4521447629fSBarry Smith n_local = map->n; 453580bdb30SBarry Smith ierr = PetscCalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);CHKERRQ(ierr); 4543bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr); 4551447629fSBarry Smith ierr = ISGetIndices(isapp,&myapp);CHKERRQ(ierr); 4561447629fSBarry Smith 4571447629fSBarry Smith ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr); 4581447629fSBarry Smith ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr); 4591447629fSBarry Smith 4601447629fSBarry Smith ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr); 4611447629fSBarry Smith if (napp) { 4621447629fSBarry Smith if (ispetsc) { 4631447629fSBarry Smith ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr); 4641447629fSBarry Smith } else { 4651447629fSBarry Smith ierr = PetscFree(petsc);CHKERRQ(ierr); 4661447629fSBarry Smith } 4671447629fSBarry Smith } 4681447629fSBarry Smith ierr = PetscFree2(lens,disp);CHKERRQ(ierr); 4691447629fSBarry Smith PetscFunctionReturn(0); 4701447629fSBarry Smith } 4711447629fSBarry Smith 4721447629fSBarry Smith /*@C 4731447629fSBarry Smith AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays. 4741447629fSBarry Smith 475d083f849SBarry Smith Collective 4761447629fSBarry Smith 4771447629fSBarry Smith Input Parameters: 4781447629fSBarry Smith + comm - MPI communicator that is to share AO 4791447629fSBarry Smith . napp - size of integer arrays 4801447629fSBarry Smith . myapp - integer array that defines an ordering 4811447629fSBarry Smith - mypetsc - integer array that defines another ordering (may be NULL to 4821447629fSBarry Smith indicate the natural ordering, that is 0,1,2,3,...) 4831447629fSBarry Smith 4841447629fSBarry Smith Output Parameter: 4851447629fSBarry Smith . aoout - the new application ordering 4861447629fSBarry Smith 4871447629fSBarry Smith Level: beginner 4881447629fSBarry Smith 48995452b02SPatrick Sanan Notes: 49095452b02SPatrick Sanan The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes" 4911447629fSBarry Smith in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices. 4921447629fSBarry Smith Comparing with AOCreateBasic(), this routine trades memory with message communication. 4931447629fSBarry Smith 4941447629fSBarry Smith .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc() 4951447629fSBarry Smith @*/ 4961447629fSBarry Smith PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout) 4971447629fSBarry Smith { 4981447629fSBarry Smith PetscErrorCode ierr; 4991447629fSBarry Smith IS isapp,ispetsc; 5001447629fSBarry Smith const PetscInt *app=myapp,*petsc=mypetsc; 5011447629fSBarry Smith 5021447629fSBarry Smith PetscFunctionBegin; 5031447629fSBarry Smith ierr = ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);CHKERRQ(ierr); 5041447629fSBarry Smith if (mypetsc) { 5051447629fSBarry Smith ierr = ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);CHKERRQ(ierr); 5061447629fSBarry Smith } else { 5071447629fSBarry Smith ispetsc = NULL; 5081447629fSBarry Smith } 5091447629fSBarry Smith ierr = AOCreateMemoryScalableIS(isapp,ispetsc,aoout);CHKERRQ(ierr); 5101447629fSBarry Smith ierr = ISDestroy(&isapp);CHKERRQ(ierr); 5111447629fSBarry Smith if (mypetsc) { 5121447629fSBarry Smith ierr = ISDestroy(&ispetsc);CHKERRQ(ierr); 5131447629fSBarry Smith } 5141447629fSBarry Smith PetscFunctionReturn(0); 5151447629fSBarry Smith } 5161447629fSBarry Smith 5171447629fSBarry Smith /*@C 5181447629fSBarry Smith AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets. 5191447629fSBarry Smith 5201447629fSBarry Smith Collective on IS 5211447629fSBarry Smith 5221447629fSBarry Smith Input Parameters: 5231447629fSBarry Smith + isapp - index set that defines an ordering 5241447629fSBarry Smith - ispetsc - index set that defines another ordering (may be NULL to use the 5251447629fSBarry Smith natural ordering) 5261447629fSBarry Smith 5271447629fSBarry Smith Output Parameter: 5281447629fSBarry Smith . aoout - the new application ordering 5291447629fSBarry Smith 5301447629fSBarry Smith Level: beginner 5311447629fSBarry Smith 53295452b02SPatrick Sanan Notes: 53395452b02SPatrick Sanan The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates; 5341447629fSBarry Smith that is there cannot be any "holes". 5351447629fSBarry Smith Comparing with AOCreateBasicIS(), this routine trades memory with message communication. 5361447629fSBarry Smith .seealso: AOCreateMemoryScalable(), AODestroy() 5371447629fSBarry Smith @*/ 5381447629fSBarry Smith PetscErrorCode AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout) 5391447629fSBarry Smith { 5401447629fSBarry Smith PetscErrorCode ierr; 5411447629fSBarry Smith MPI_Comm comm; 5421447629fSBarry Smith AO ao; 5431447629fSBarry Smith 5441447629fSBarry Smith PetscFunctionBegin; 5451447629fSBarry Smith ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr); 5461447629fSBarry Smith ierr = AOCreate(comm,&ao);CHKERRQ(ierr); 5471447629fSBarry Smith ierr = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr); 5481447629fSBarry Smith ierr = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr); 549817ea411SJed Brown ierr = AOViewFromOptions(ao,NULL,"-ao_view");CHKERRQ(ierr); 5501447629fSBarry Smith *aoout = ao; 5511447629fSBarry Smith PetscFunctionReturn(0); 5521447629fSBarry Smith } 553