11447629fSBarry Smith 21447629fSBarry Smith /* 31447629fSBarry Smith The memory scalable AO application ordering routines. These store the 41447629fSBarry Smith local orderings on each processor. 51447629fSBarry Smith */ 61447629fSBarry Smith 71447629fSBarry Smith #include <../src/vec/is/ao/aoimpl.h> /*I "petscao.h" I*/ 81447629fSBarry Smith 91447629fSBarry Smith typedef struct { 101447629fSBarry Smith PetscInt *app_loc; /* app_loc[i] is the partner for the ith local PETSc slot */ 111447629fSBarry Smith PetscInt *petsc_loc; /* petsc_loc[j] is the partner for the jth local app slot */ 121447629fSBarry Smith PetscLayout map; /* determines the local sizes of ao */ 131447629fSBarry Smith } AO_MemoryScalable; 141447629fSBarry Smith 151447629fSBarry Smith /* 161447629fSBarry Smith All processors have the same data so processor 1 prints it 171447629fSBarry Smith */ 181447629fSBarry Smith #undef __FUNCT__ 191447629fSBarry Smith #define __FUNCT__ "AOView_MemoryScalable" 201447629fSBarry Smith PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer) 211447629fSBarry Smith { 221447629fSBarry Smith PetscErrorCode ierr; 231447629fSBarry Smith PetscMPIInt rank,size; 241447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 251447629fSBarry Smith PetscBool iascii; 261447629fSBarry Smith PetscMPIInt tag_app,tag_petsc; 271447629fSBarry Smith PetscLayout map = aomems->map; 281447629fSBarry Smith PetscInt *app,*app_loc,*petsc,*petsc_loc,len,i,j; 291447629fSBarry Smith MPI_Status status; 301447629fSBarry Smith 311447629fSBarry Smith PetscFunctionBegin; 321447629fSBarry Smith ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 331447629fSBarry Smith if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name); 341447629fSBarry Smith 351447629fSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);CHKERRQ(ierr); 361447629fSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);CHKERRQ(ierr); 371447629fSBarry Smith 381447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_app);CHKERRQ(ierr); 391447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);CHKERRQ(ierr); 401447629fSBarry Smith 411447629fSBarry Smith if (!rank) { 421447629fSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);CHKERRQ(ierr); 431447629fSBarry Smith ierr = PetscViewerASCIIPrintf(viewer, "PETSc->App App->PETSc\n");CHKERRQ(ierr); 441447629fSBarry Smith 45dcca6d9dSJed Brown ierr = PetscMalloc2(map->N,&app,map->N,&petsc);CHKERRQ(ierr); 461447629fSBarry Smith len = map->n; 471447629fSBarry Smith /* print local AO */ 481447629fSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",rank);CHKERRQ(ierr); 491447629fSBarry Smith for (i=0; i<len; i++) { 501447629fSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"%3D %3D %3D %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);CHKERRQ(ierr); 511447629fSBarry Smith } 521447629fSBarry Smith 531447629fSBarry Smith /* recv and print off-processor's AO */ 541447629fSBarry Smith for (i=1; i<size; i++) { 551447629fSBarry Smith len = map->range[i+1] - map->range[i]; 561447629fSBarry Smith app_loc = app + map->range[i]; 571447629fSBarry Smith petsc_loc = petsc+ map->range[i]; 581447629fSBarry Smith ierr = MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr); 591447629fSBarry Smith ierr = MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr); 601447629fSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);CHKERRQ(ierr); 611447629fSBarry Smith for (j=0; j<len; j++) { 621447629fSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"%3D %3D %3D %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);CHKERRQ(ierr); 631447629fSBarry Smith } 641447629fSBarry Smith } 651447629fSBarry Smith ierr = PetscFree2(app,petsc);CHKERRQ(ierr); 661447629fSBarry Smith 671447629fSBarry Smith } else { 681447629fSBarry Smith /* send values */ 691447629fSBarry Smith ierr = MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr); 701447629fSBarry Smith ierr = MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr); 711447629fSBarry Smith } 721447629fSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 731447629fSBarry Smith PetscFunctionReturn(0); 741447629fSBarry Smith } 751447629fSBarry Smith 761447629fSBarry Smith #undef __FUNCT__ 771447629fSBarry Smith #define __FUNCT__ "AODestroy_MemoryScalable" 781447629fSBarry Smith PetscErrorCode AODestroy_MemoryScalable(AO ao) 791447629fSBarry Smith { 801447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 811447629fSBarry Smith PetscErrorCode ierr; 821447629fSBarry Smith 831447629fSBarry Smith PetscFunctionBegin; 841447629fSBarry Smith ierr = PetscFree2(aomems->app_loc,aomems->petsc_loc);CHKERRQ(ierr); 851447629fSBarry Smith ierr = PetscLayoutDestroy(&aomems->map);CHKERRQ(ierr); 861447629fSBarry Smith ierr = PetscFree(aomems);CHKERRQ(ierr); 871447629fSBarry Smith PetscFunctionReturn(0); 881447629fSBarry Smith } 891447629fSBarry Smith 901447629fSBarry Smith /* 911447629fSBarry Smith Input Parameters: 921447629fSBarry Smith + ao - the application ordering context 931447629fSBarry Smith . n - the number of integers in ia[] 941447629fSBarry Smith . ia - the integers; these are replaced with their mapped value 951447629fSBarry Smith - maploc - app_loc or petsc_loc in struct "AO_MemoryScalable" 961447629fSBarry Smith 971447629fSBarry Smith Output Parameter: 981447629fSBarry Smith . ia - the mapped interges 991447629fSBarry Smith */ 1001447629fSBarry Smith #undef __FUNCT__ 1011447629fSBarry Smith #define __FUNCT__ "AOMap_MemoryScalable_private" 1021447629fSBarry Smith PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,PetscInt *maploc) 1031447629fSBarry Smith { 1041447629fSBarry Smith PetscErrorCode ierr; 1051447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 1061447629fSBarry Smith MPI_Comm comm; 1071447629fSBarry Smith PetscMPIInt rank,size,tag1,tag2; 10876ec1555SBarry Smith PetscInt *owner,*start,*sizes,nsends,nreceives; 1091447629fSBarry Smith PetscInt nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2; 1101447629fSBarry Smith PetscInt *owners = aomems->map->range; 1111447629fSBarry Smith MPI_Request *send_waits,*recv_waits,*send_waits2,*recv_waits2; 1121447629fSBarry Smith MPI_Status recv_status; 1131447629fSBarry Smith PetscMPIInt nindices,source,widx; 1141447629fSBarry Smith PetscInt *rbuf,*sbuf; 1151447629fSBarry Smith MPI_Status *send_status,*send_status2; 1161447629fSBarry Smith 1171447629fSBarry Smith PetscFunctionBegin; 1181447629fSBarry Smith ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr); 1191447629fSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1201447629fSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1211447629fSBarry Smith 1221447629fSBarry Smith /* first count number of contributors to each processor */ 123037dbc42SBarry Smith ierr = PetscMalloc2(2*size,&sizes,size,&start);CHKERRQ(ierr); 12476ec1555SBarry Smith ierr = PetscMemzero(sizes,2*size*sizeof(PetscInt));CHKERRQ(ierr); 125f628708eSJed Brown ierr = PetscCalloc1(n,&owner);CHKERRQ(ierr); 1261447629fSBarry Smith 1271447629fSBarry Smith j = 0; 1281447629fSBarry Smith lastidx = -1; 1291447629fSBarry Smith for (i=0; i<n; i++) { 1301447629fSBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 1311447629fSBarry Smith if (lastidx > (idx = ia[i])) j = 0; 1321447629fSBarry Smith lastidx = idx; 1331447629fSBarry Smith for (; j<size; j++) { 1341447629fSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 13576ec1555SBarry Smith sizes[2*j]++; /* num of indices to be sent */ 13676ec1555SBarry Smith sizes[2*j+1] = 1; /* send to proc[j] */ 1371447629fSBarry Smith owner[i] = j; 1381447629fSBarry Smith break; 1391447629fSBarry Smith } 1401447629fSBarry Smith } 1411447629fSBarry Smith } 14276ec1555SBarry Smith sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */ 1431447629fSBarry Smith nsends = 0; 14476ec1555SBarry Smith for (i=0; i<size; i++) nsends += sizes[2*i+1]; 1451447629fSBarry Smith 1461447629fSBarry Smith /* inform other processors of number of messages and max length*/ 14776ec1555SBarry Smith ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr); 1481447629fSBarry Smith 1491447629fSBarry Smith /* allocate arrays */ 1501447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag1);CHKERRQ(ierr); 1511447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag2);CHKERRQ(ierr); 1521447629fSBarry Smith 153dcca6d9dSJed Brown ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr); 154dcca6d9dSJed Brown ierr = PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2);CHKERRQ(ierr); 1551447629fSBarry Smith 156dcca6d9dSJed Brown ierr = PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr); 157dcca6d9dSJed Brown ierr = PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2);CHKERRQ(ierr); 1581447629fSBarry Smith 1591447629fSBarry Smith /* post 1st receives: receive others requests 1601447629fSBarry Smith since we don't know how long each individual message is we 1611447629fSBarry Smith allocate the largest needed buffer for each receive. Potentially 1621447629fSBarry Smith this is a lot of wasted space. 1631447629fSBarry Smith */ 1641447629fSBarry Smith for (i=0,count=0; i<nreceives; i++) { 1651447629fSBarry Smith ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr); 1661447629fSBarry Smith } 1671447629fSBarry Smith 1681447629fSBarry Smith /* do 1st sends: 1691447629fSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 1701447629fSBarry Smith the ith processor 1711447629fSBarry Smith */ 1721447629fSBarry Smith start[0] = 0; 17376ec1555SBarry Smith for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2]; 1741447629fSBarry Smith for (i=0; i<n; i++) { 1751447629fSBarry Smith j = owner[i]; 1761447629fSBarry Smith if (j != rank) { 1771447629fSBarry Smith sindices[start[j]++] = ia[i]; 1781447629fSBarry Smith } else { /* compute my own map */ 1791447629fSBarry Smith if (ia[i] >= owners[rank] && ia[i] < owners[rank+1]) { 1801447629fSBarry Smith ia[i] = maploc[ia[i]-owners[rank]]; 1811447629fSBarry Smith } else { 1821447629fSBarry Smith ia[i] = -1; /* ia[i] is not in the range of 0 and N-1, maps it to -1 */ 1831447629fSBarry Smith } 1841447629fSBarry Smith } 1851447629fSBarry Smith } 1861447629fSBarry Smith 1871447629fSBarry Smith start[0] = 0; 18876ec1555SBarry Smith for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2]; 1891447629fSBarry Smith for (i=0,count=0; i<size; i++) { 19076ec1555SBarry Smith if (sizes[2*i+1]) { 1911447629fSBarry Smith /* send my request to others */ 19276ec1555SBarry Smith ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count);CHKERRQ(ierr); 1931447629fSBarry Smith /* post receive for the answer of my request */ 19476ec1555SBarry Smith ierr = MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);CHKERRQ(ierr); 1951447629fSBarry Smith count++; 1961447629fSBarry Smith } 1971447629fSBarry Smith } 1981447629fSBarry Smith if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count); 1991447629fSBarry Smith 2001447629fSBarry Smith /* wait on 1st sends */ 2011447629fSBarry Smith if (nsends) { 2021447629fSBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 2031447629fSBarry Smith } 2041447629fSBarry Smith 2051447629fSBarry Smith /* 1st recvs: other's requests */ 2061447629fSBarry Smith for (j=0; j< nreceives; j++) { 2071447629fSBarry Smith ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr); /* idx: index of handle for operation that completed */ 2081447629fSBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr); 2091447629fSBarry Smith rbuf = rindices+nmax*widx; /* global index */ 2101447629fSBarry Smith source = recv_status.MPI_SOURCE; 2111447629fSBarry Smith 2121447629fSBarry Smith /* compute mapping */ 2131447629fSBarry Smith sbuf = rbuf; 2141447629fSBarry Smith for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]]; 2151447629fSBarry Smith 2161447629fSBarry Smith /* send mapping back to the sender */ 2171447629fSBarry Smith ierr = MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);CHKERRQ(ierr); 2181447629fSBarry Smith } 2191447629fSBarry Smith 2201447629fSBarry Smith /* wait on 2nd sends */ 2211447629fSBarry Smith if (nreceives) { 2221447629fSBarry Smith ierr = MPI_Waitall(nreceives,send_waits2,send_status2);CHKERRQ(ierr); 2231447629fSBarry Smith } 2241447629fSBarry Smith 2251447629fSBarry Smith /* 2nd recvs: for the answer of my request */ 2261447629fSBarry Smith for (j=0; j< nsends; j++) { 2271447629fSBarry Smith ierr = MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);CHKERRQ(ierr); 2281447629fSBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr); 2291447629fSBarry Smith source = recv_status.MPI_SOURCE; 2301447629fSBarry Smith /* pack output ia[] */ 2311447629fSBarry Smith rbuf = sindices2+start[source]; 2321447629fSBarry Smith count = 0; 2331447629fSBarry Smith for (i=0; i<n; i++) { 2341447629fSBarry Smith if (source == owner[i]) ia[i] = rbuf[count++]; 2351447629fSBarry Smith } 2361447629fSBarry Smith } 2371447629fSBarry Smith 2381447629fSBarry Smith /* free arrays */ 23976ec1555SBarry Smith ierr = PetscFree2(sizes,start);CHKERRQ(ierr); 2401447629fSBarry Smith ierr = PetscFree(owner);CHKERRQ(ierr); 2411447629fSBarry Smith ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr); 2421447629fSBarry Smith ierr = PetscFree2(rindices2,recv_waits2);CHKERRQ(ierr); 2431447629fSBarry Smith ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr); 2441447629fSBarry Smith ierr = PetscFree3(sindices2,send_waits2,send_status2);CHKERRQ(ierr); 2451447629fSBarry Smith PetscFunctionReturn(0); 2461447629fSBarry Smith } 2471447629fSBarry Smith 2481447629fSBarry Smith #undef __FUNCT__ 2491447629fSBarry Smith #define __FUNCT__ "AOPetscToApplication_MemoryScalable" 2501447629fSBarry Smith PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia) 2511447629fSBarry Smith { 2521447629fSBarry Smith PetscErrorCode ierr; 2531447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 2541447629fSBarry Smith PetscInt *app_loc = aomems->app_loc; 2551447629fSBarry Smith 2561447629fSBarry Smith PetscFunctionBegin; 2571447629fSBarry Smith ierr = AOMap_MemoryScalable_private(ao,n,ia,app_loc);CHKERRQ(ierr); 2581447629fSBarry Smith PetscFunctionReturn(0); 2591447629fSBarry Smith } 2601447629fSBarry Smith 2611447629fSBarry Smith #undef __FUNCT__ 2621447629fSBarry Smith #define __FUNCT__ "AOApplicationToPetsc_MemoryScalable" 2631447629fSBarry Smith PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia) 2641447629fSBarry Smith { 2651447629fSBarry Smith PetscErrorCode ierr; 2661447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 2671447629fSBarry Smith PetscInt *petsc_loc = aomems->petsc_loc; 2681447629fSBarry Smith 2691447629fSBarry Smith PetscFunctionBegin; 2701447629fSBarry Smith ierr = AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);CHKERRQ(ierr); 2711447629fSBarry Smith PetscFunctionReturn(0); 2721447629fSBarry Smith } 2731447629fSBarry Smith 2741447629fSBarry Smith static struct _AOOps AOOps_MemoryScalable = { 2751447629fSBarry Smith AOView_MemoryScalable, 2761447629fSBarry Smith AODestroy_MemoryScalable, 2771447629fSBarry Smith AOPetscToApplication_MemoryScalable, 2781447629fSBarry Smith AOApplicationToPetsc_MemoryScalable, 2791447629fSBarry Smith 0, 2801447629fSBarry Smith 0, 2811447629fSBarry Smith 0, 2821447629fSBarry Smith 0 2831447629fSBarry Smith }; 2841447629fSBarry Smith 2851447629fSBarry Smith #undef __FUNCT__ 2861447629fSBarry Smith #define __FUNCT__ "AOCreateMemoryScalable_private" 2871447629fSBarry Smith PetscErrorCode AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc) 2881447629fSBarry Smith { 2891447629fSBarry Smith PetscErrorCode ierr; 2901447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 2911447629fSBarry Smith PetscLayout map = aomems->map; 2921447629fSBarry Smith PetscInt n_local = map->n,i,j; 2931447629fSBarry Smith PetscMPIInt rank,size,tag; 29476ec1555SBarry Smith PetscInt *owner,*start,*sizes,nsends,nreceives; 2951447629fSBarry Smith PetscInt nmax,count,*sindices,*rindices,idx,lastidx; 2961447629fSBarry Smith PetscInt *owners = aomems->map->range; 2971447629fSBarry Smith MPI_Request *send_waits,*recv_waits; 2981447629fSBarry Smith MPI_Status recv_status; 2991447629fSBarry Smith PetscMPIInt nindices,widx; 3001447629fSBarry Smith PetscInt *rbuf; 3011447629fSBarry Smith PetscInt n=napp,ip,ia; 3021447629fSBarry Smith MPI_Status *send_status; 3031447629fSBarry Smith 3041447629fSBarry Smith PetscFunctionBegin; 3051447629fSBarry Smith ierr = PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr); 3061447629fSBarry Smith 3071447629fSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3081447629fSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3091447629fSBarry Smith 3101447629fSBarry Smith /* first count number of contributors (of from_array[]) to each processor */ 311f628708eSJed Brown ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 312f628708eSJed Brown ierr = PetscMalloc1(n,&owner);CHKERRQ(ierr); 3131447629fSBarry Smith 3141447629fSBarry Smith j = 0; 3151447629fSBarry Smith lastidx = -1; 3161447629fSBarry Smith for (i=0; i<n; i++) { 3171447629fSBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 3181447629fSBarry Smith if (lastidx > (idx = from_array[i])) j = 0; 3191447629fSBarry Smith lastidx = idx; 3201447629fSBarry Smith for (; j<size; j++) { 3211447629fSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 32276ec1555SBarry Smith sizes[2*j] += 2; /* num of indices to be sent - in pairs (ip,ia) */ 32376ec1555SBarry Smith sizes[2*j+1] = 1; /* send to proc[j] */ 3241447629fSBarry Smith owner[i] = j; 3251447629fSBarry Smith break; 3261447629fSBarry Smith } 3271447629fSBarry Smith } 3281447629fSBarry Smith } 32976ec1555SBarry Smith sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */ 3301447629fSBarry Smith nsends = 0; 33176ec1555SBarry Smith for (i=0; i<size; i++) nsends += sizes[2*i+1]; 3321447629fSBarry Smith 3331447629fSBarry Smith /* inform other processors of number of messages and max length*/ 33476ec1555SBarry Smith ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr); 3351447629fSBarry Smith 3361447629fSBarry Smith /* allocate arrays */ 3371447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag);CHKERRQ(ierr); 338dcca6d9dSJed Brown ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr); 339dcca6d9dSJed Brown ierr = PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr); 340785e854fSJed Brown ierr = PetscMalloc1(size,&start);CHKERRQ(ierr); 3411447629fSBarry Smith 3421447629fSBarry Smith /* post receives: */ 3431447629fSBarry Smith for (i=0; i<nreceives; i++) { 3441447629fSBarry Smith ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 3451447629fSBarry Smith } 3461447629fSBarry Smith 3471447629fSBarry Smith /* do sends: 3481447629fSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 3491447629fSBarry Smith the ith processor 3501447629fSBarry Smith */ 3511447629fSBarry Smith start[0] = 0; 35276ec1555SBarry Smith for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2]; 3531447629fSBarry Smith for (i=0; i<n; i++) { 3541447629fSBarry Smith j = owner[i]; 3551447629fSBarry Smith if (j != rank) { 3561447629fSBarry Smith ip = from_array[i]; 3571447629fSBarry Smith ia = to_array[i]; 3581447629fSBarry Smith sindices[start[j]++] = ip; 3591447629fSBarry Smith sindices[start[j]++] = ia; 3601447629fSBarry Smith } else { /* compute my own map */ 3611447629fSBarry Smith ip = from_array[i] - owners[rank]; 3621447629fSBarry Smith ia = to_array[i]; 3631447629fSBarry Smith aomap_loc[ip] = ia; 3641447629fSBarry Smith } 3651447629fSBarry Smith } 3661447629fSBarry Smith 3671447629fSBarry Smith start[0] = 0; 36876ec1555SBarry Smith for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2]; 3691447629fSBarry Smith for (i=0,count=0; i<size; i++) { 37076ec1555SBarry Smith if (sizes[2*i+1]) { 37176ec1555SBarry Smith ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRQ(ierr); 3721447629fSBarry Smith count++; 3731447629fSBarry Smith } 3741447629fSBarry Smith } 3751447629fSBarry Smith if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count); 3761447629fSBarry Smith 3771447629fSBarry Smith /* wait on sends */ 3781447629fSBarry Smith if (nsends) { 3791447629fSBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 3801447629fSBarry Smith } 3811447629fSBarry Smith 3821447629fSBarry Smith /* recvs */ 3831447629fSBarry Smith count=0; 3841447629fSBarry Smith for (j= nreceives; j>0; j--) { 3851447629fSBarry Smith ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr); 3861447629fSBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr); 3871447629fSBarry Smith rbuf = rindices+nmax*widx; /* global index */ 3881447629fSBarry Smith 3891447629fSBarry Smith /* compute local mapping */ 3901447629fSBarry Smith for (i=0; i<nindices; i+=2) { /* pack aomap_loc */ 3911447629fSBarry Smith ip = rbuf[i] - owners[rank]; /* local index */ 3921447629fSBarry Smith ia = rbuf[i+1]; 3931447629fSBarry Smith aomap_loc[ip] = ia; 3941447629fSBarry Smith } 3951447629fSBarry Smith count++; 3961447629fSBarry Smith } 3971447629fSBarry Smith 3981447629fSBarry Smith ierr = PetscFree(start);CHKERRQ(ierr); 3991447629fSBarry Smith ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr); 4001447629fSBarry Smith ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr); 4011447629fSBarry Smith ierr = PetscFree(owner);CHKERRQ(ierr); 40276ec1555SBarry Smith ierr = PetscFree(sizes);CHKERRQ(ierr); 4031447629fSBarry Smith PetscFunctionReturn(0); 4041447629fSBarry Smith } 4051447629fSBarry Smith 4061447629fSBarry Smith #undef __FUNCT__ 4071447629fSBarry Smith #define __FUNCT__ "AOCreate_MemoryScalable" 4088cc058d9SJed Brown PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao) 4091447629fSBarry Smith { 4101447629fSBarry Smith PetscErrorCode ierr; 4111447629fSBarry Smith IS isapp=ao->isapp,ispetsc=ao->ispetsc; 4121447629fSBarry Smith const PetscInt *mypetsc,*myapp; 4131447629fSBarry Smith PetscInt napp,n_local,N,i,start,*petsc,*lens,*disp; 4141447629fSBarry Smith MPI_Comm comm; 4151447629fSBarry Smith AO_MemoryScalable *aomems; 4161447629fSBarry Smith PetscLayout map; 4171447629fSBarry Smith PetscMPIInt size,rank; 4181447629fSBarry Smith 4191447629fSBarry Smith PetscFunctionBegin; 420*01e608bcSBarry Smith if (!isapp) SETERRQ(PetscObjectComm((PetscObject)ao),PETSC_ERR_ARG_WRONGSTATE,"AOSetIS() must be called before AOSetType()"); 4211447629fSBarry Smith /* create special struct aomems */ 422b00a9115SJed Brown ierr = PetscNewLog(ao,&aomems);CHKERRQ(ierr); 4231447629fSBarry Smith ao->data = (void*) aomems; 4241447629fSBarry Smith ierr = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr); 4251447629fSBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr); 4261447629fSBarry Smith 4271447629fSBarry Smith /* transmit all local lengths of isapp to all processors */ 4281447629fSBarry Smith ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr); 4291447629fSBarry Smith ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 4301447629fSBarry Smith ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 431dcca6d9dSJed Brown ierr = PetscMalloc2(size,&lens,size,&disp);CHKERRQ(ierr); 4321447629fSBarry Smith ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr); 4331447629fSBarry Smith ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRQ(ierr); 4341447629fSBarry Smith 4351447629fSBarry Smith N = 0; 4361447629fSBarry Smith for (i = 0; i < size; i++) { 4371447629fSBarry Smith disp[i] = N; 4381447629fSBarry Smith N += lens[i]; 4391447629fSBarry Smith } 4401447629fSBarry Smith 4411447629fSBarry Smith /* If ispetsc is 0 then use "natural" numbering */ 4421447629fSBarry Smith if (napp) { 4431447629fSBarry Smith if (!ispetsc) { 4441447629fSBarry Smith start = disp[rank]; 445854ce69bSBarry Smith ierr = PetscMalloc1(napp+1, &petsc);CHKERRQ(ierr); 4461447629fSBarry Smith for (i=0; i<napp; i++) petsc[i] = start + i; 4471447629fSBarry Smith } else { 4481447629fSBarry Smith ierr = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr); 4491447629fSBarry Smith petsc = (PetscInt*)mypetsc; 4501447629fSBarry Smith } 4511447629fSBarry Smith } 4521447629fSBarry Smith 4531447629fSBarry Smith /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */ 4541447629fSBarry Smith ierr = PetscLayoutCreate(comm,&map);CHKERRQ(ierr); 4551447629fSBarry Smith map->bs = 1; 4561447629fSBarry Smith map->N = N; 4571447629fSBarry Smith ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4581447629fSBarry Smith 4591447629fSBarry Smith ao->N = N; 4601447629fSBarry Smith ao->n = map->n; 4611447629fSBarry Smith aomems->map = map; 4621447629fSBarry Smith 4631447629fSBarry Smith /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */ 4641447629fSBarry Smith n_local = map->n; 465dcca6d9dSJed Brown ierr = PetscMalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);CHKERRQ(ierr); 4663bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr); 4671447629fSBarry Smith ierr = PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr); 4681447629fSBarry Smith ierr = PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr); 4691447629fSBarry Smith ierr = ISGetIndices(isapp,&myapp);CHKERRQ(ierr); 4701447629fSBarry Smith 4711447629fSBarry Smith ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr); 4721447629fSBarry Smith ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr); 4731447629fSBarry Smith 4741447629fSBarry Smith ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr); 4751447629fSBarry Smith if (napp) { 4761447629fSBarry Smith if (ispetsc) { 4771447629fSBarry Smith ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr); 4781447629fSBarry Smith } else { 4791447629fSBarry Smith ierr = PetscFree(petsc);CHKERRQ(ierr); 4801447629fSBarry Smith } 4811447629fSBarry Smith } 4821447629fSBarry Smith ierr = PetscFree2(lens,disp);CHKERRQ(ierr); 4831447629fSBarry Smith PetscFunctionReturn(0); 4841447629fSBarry Smith } 4851447629fSBarry Smith 4861447629fSBarry Smith #undef __FUNCT__ 4871447629fSBarry Smith #define __FUNCT__ "AOCreateMemoryScalable" 4881447629fSBarry Smith /*@C 4891447629fSBarry Smith AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays. 4901447629fSBarry Smith 4911447629fSBarry Smith Collective on MPI_Comm 4921447629fSBarry Smith 4931447629fSBarry Smith Input Parameters: 4941447629fSBarry Smith + comm - MPI communicator that is to share AO 4951447629fSBarry Smith . napp - size of integer arrays 4961447629fSBarry Smith . myapp - integer array that defines an ordering 4971447629fSBarry Smith - mypetsc - integer array that defines another ordering (may be NULL to 4981447629fSBarry Smith indicate the natural ordering, that is 0,1,2,3,...) 4991447629fSBarry Smith 5001447629fSBarry Smith Output Parameter: 5011447629fSBarry Smith . aoout - the new application ordering 5021447629fSBarry Smith 5031447629fSBarry Smith Level: beginner 5041447629fSBarry Smith 5051447629fSBarry Smith Notes: The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes" 5061447629fSBarry Smith in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices. 5071447629fSBarry Smith Comparing with AOCreateBasic(), this routine trades memory with message communication. 5081447629fSBarry Smith 5091447629fSBarry Smith .keywords: AO, create 5101447629fSBarry Smith 5111447629fSBarry Smith .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc() 5121447629fSBarry Smith @*/ 5131447629fSBarry Smith PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout) 5141447629fSBarry Smith { 5151447629fSBarry Smith PetscErrorCode ierr; 5161447629fSBarry Smith IS isapp,ispetsc; 5171447629fSBarry Smith const PetscInt *app=myapp,*petsc=mypetsc; 5181447629fSBarry Smith 5191447629fSBarry Smith PetscFunctionBegin; 5201447629fSBarry Smith ierr = ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);CHKERRQ(ierr); 5211447629fSBarry Smith if (mypetsc) { 5221447629fSBarry Smith ierr = ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);CHKERRQ(ierr); 5231447629fSBarry Smith } else { 5241447629fSBarry Smith ispetsc = NULL; 5251447629fSBarry Smith } 5261447629fSBarry Smith ierr = AOCreateMemoryScalableIS(isapp,ispetsc,aoout);CHKERRQ(ierr); 5271447629fSBarry Smith ierr = ISDestroy(&isapp);CHKERRQ(ierr); 5281447629fSBarry Smith if (mypetsc) { 5291447629fSBarry Smith ierr = ISDestroy(&ispetsc);CHKERRQ(ierr); 5301447629fSBarry Smith } 5311447629fSBarry Smith PetscFunctionReturn(0); 5321447629fSBarry Smith } 5331447629fSBarry Smith 5341447629fSBarry Smith #undef __FUNCT__ 5351447629fSBarry Smith #define __FUNCT__ "AOCreateMemoryScalableIS" 5361447629fSBarry Smith /*@C 5371447629fSBarry Smith AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets. 5381447629fSBarry Smith 5391447629fSBarry Smith Collective on IS 5401447629fSBarry Smith 5411447629fSBarry Smith Input Parameters: 5421447629fSBarry Smith + isapp - index set that defines an ordering 5431447629fSBarry Smith - ispetsc - index set that defines another ordering (may be NULL to use the 5441447629fSBarry Smith natural ordering) 5451447629fSBarry Smith 5461447629fSBarry Smith Output Parameter: 5471447629fSBarry Smith . aoout - the new application ordering 5481447629fSBarry Smith 5491447629fSBarry Smith Level: beginner 5501447629fSBarry Smith 5511447629fSBarry Smith Notes: The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates; 5521447629fSBarry Smith that is there cannot be any "holes". 5531447629fSBarry Smith Comparing with AOCreateBasicIS(), this routine trades memory with message communication. 5541447629fSBarry Smith .keywords: AO, create 5551447629fSBarry Smith 5561447629fSBarry Smith .seealso: AOCreateMemoryScalable(), AODestroy() 5571447629fSBarry Smith @*/ 5581447629fSBarry Smith PetscErrorCode AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout) 5591447629fSBarry Smith { 5601447629fSBarry Smith PetscErrorCode ierr; 5611447629fSBarry Smith MPI_Comm comm; 5621447629fSBarry Smith AO ao; 5631447629fSBarry Smith 5641447629fSBarry Smith PetscFunctionBegin; 5651447629fSBarry Smith ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr); 5661447629fSBarry Smith ierr = AOCreate(comm,&ao);CHKERRQ(ierr); 5671447629fSBarry Smith ierr = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr); 5681447629fSBarry Smith ierr = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr); 569817ea411SJed Brown ierr = AOViewFromOptions(ao,NULL,"-ao_view");CHKERRQ(ierr); 5701447629fSBarry Smith *aoout = ao; 5711447629fSBarry Smith PetscFunctionReturn(0); 5721447629fSBarry Smith } 573