1*1447629fSBarry Smith 2*1447629fSBarry Smith /* 3*1447629fSBarry Smith The memory scalable AO application ordering routines. These store the 4*1447629fSBarry Smith local orderings on each processor. 5*1447629fSBarry Smith */ 6*1447629fSBarry Smith 7*1447629fSBarry Smith #include <../src/vec/is/ao/aoimpl.h> /*I "petscao.h" I*/ 8*1447629fSBarry Smith 9*1447629fSBarry Smith typedef struct { 10*1447629fSBarry Smith PetscInt *app_loc; /* app_loc[i] is the partner for the ith local PETSc slot */ 11*1447629fSBarry Smith PetscInt *petsc_loc; /* petsc_loc[j] is the partner for the jth local app slot */ 12*1447629fSBarry Smith PetscLayout map; /* determines the local sizes of ao */ 13*1447629fSBarry Smith } AO_MemoryScalable; 14*1447629fSBarry Smith 15*1447629fSBarry Smith /* 16*1447629fSBarry Smith All processors have the same data so processor 1 prints it 17*1447629fSBarry Smith */ 18*1447629fSBarry Smith #undef __FUNCT__ 19*1447629fSBarry Smith #define __FUNCT__ "AOView_MemoryScalable" 20*1447629fSBarry Smith PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer) 21*1447629fSBarry Smith { 22*1447629fSBarry Smith PetscErrorCode ierr; 23*1447629fSBarry Smith PetscMPIInt rank,size; 24*1447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 25*1447629fSBarry Smith PetscBool iascii; 26*1447629fSBarry Smith PetscMPIInt tag_app,tag_petsc; 27*1447629fSBarry Smith PetscLayout map = aomems->map; 28*1447629fSBarry Smith PetscInt *app,*app_loc,*petsc,*petsc_loc,len,i,j; 29*1447629fSBarry Smith MPI_Status status; 30*1447629fSBarry Smith 31*1447629fSBarry Smith PetscFunctionBegin; 32*1447629fSBarry Smith ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 33*1447629fSBarry Smith if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name); 34*1447629fSBarry Smith 35*1447629fSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);CHKERRQ(ierr); 36*1447629fSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);CHKERRQ(ierr); 37*1447629fSBarry Smith 38*1447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_app);CHKERRQ(ierr); 39*1447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);CHKERRQ(ierr); 40*1447629fSBarry Smith 41*1447629fSBarry Smith if (!rank) { 42*1447629fSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);CHKERRQ(ierr); 43*1447629fSBarry Smith ierr = PetscViewerASCIIPrintf(viewer, "PETSc->App App->PETSc\n");CHKERRQ(ierr); 44*1447629fSBarry Smith 45*1447629fSBarry Smith ierr = PetscMalloc2(map->N,PetscInt,&app,map->N,PetscInt,&petsc);CHKERRQ(ierr); 46*1447629fSBarry Smith len = map->n; 47*1447629fSBarry Smith /* print local AO */ 48*1447629fSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",rank);CHKERRQ(ierr); 49*1447629fSBarry Smith for (i=0; i<len; i++) { 50*1447629fSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"%3D %3D %3D %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);CHKERRQ(ierr); 51*1447629fSBarry Smith } 52*1447629fSBarry Smith 53*1447629fSBarry Smith /* recv and print off-processor's AO */ 54*1447629fSBarry Smith for (i=1; i<size; i++) { 55*1447629fSBarry Smith len = map->range[i+1] - map->range[i]; 56*1447629fSBarry Smith app_loc = app + map->range[i]; 57*1447629fSBarry Smith petsc_loc = petsc+ map->range[i]; 58*1447629fSBarry Smith ierr = MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr); 59*1447629fSBarry Smith ierr = MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr); 60*1447629fSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);CHKERRQ(ierr); 61*1447629fSBarry Smith for (j=0; j<len; j++) { 62*1447629fSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"%3D %3D %3D %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);CHKERRQ(ierr); 63*1447629fSBarry Smith } 64*1447629fSBarry Smith } 65*1447629fSBarry Smith ierr = PetscFree2(app,petsc);CHKERRQ(ierr); 66*1447629fSBarry Smith 67*1447629fSBarry Smith } else { 68*1447629fSBarry Smith /* send values */ 69*1447629fSBarry Smith ierr = MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr); 70*1447629fSBarry Smith ierr = MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr); 71*1447629fSBarry Smith } 72*1447629fSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 73*1447629fSBarry Smith PetscFunctionReturn(0); 74*1447629fSBarry Smith } 75*1447629fSBarry Smith 76*1447629fSBarry Smith #undef __FUNCT__ 77*1447629fSBarry Smith #define __FUNCT__ "AODestroy_MemoryScalable" 78*1447629fSBarry Smith PetscErrorCode AODestroy_MemoryScalable(AO ao) 79*1447629fSBarry Smith { 80*1447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 81*1447629fSBarry Smith PetscErrorCode ierr; 82*1447629fSBarry Smith 83*1447629fSBarry Smith PetscFunctionBegin; 84*1447629fSBarry Smith ierr = PetscFree2(aomems->app_loc,aomems->petsc_loc);CHKERRQ(ierr); 85*1447629fSBarry Smith ierr = PetscLayoutDestroy(&aomems->map);CHKERRQ(ierr); 86*1447629fSBarry Smith ierr = PetscFree(aomems);CHKERRQ(ierr); 87*1447629fSBarry Smith PetscFunctionReturn(0); 88*1447629fSBarry Smith } 89*1447629fSBarry Smith 90*1447629fSBarry Smith /* 91*1447629fSBarry Smith Input Parameters: 92*1447629fSBarry Smith + ao - the application ordering context 93*1447629fSBarry Smith . n - the number of integers in ia[] 94*1447629fSBarry Smith . ia - the integers; these are replaced with their mapped value 95*1447629fSBarry Smith - maploc - app_loc or petsc_loc in struct "AO_MemoryScalable" 96*1447629fSBarry Smith 97*1447629fSBarry Smith Output Parameter: 98*1447629fSBarry Smith . ia - the mapped interges 99*1447629fSBarry Smith */ 100*1447629fSBarry Smith #undef __FUNCT__ 101*1447629fSBarry Smith #define __FUNCT__ "AOMap_MemoryScalable_private" 102*1447629fSBarry Smith PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,PetscInt *maploc) 103*1447629fSBarry Smith { 104*1447629fSBarry Smith PetscErrorCode ierr; 105*1447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 106*1447629fSBarry Smith MPI_Comm comm; 107*1447629fSBarry Smith PetscMPIInt rank,size,tag1,tag2; 108*1447629fSBarry Smith PetscInt *owner,*start,*nprocs,nsends,nreceives; 109*1447629fSBarry Smith PetscInt nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2; 110*1447629fSBarry Smith PetscInt *owners = aomems->map->range; 111*1447629fSBarry Smith MPI_Request *send_waits,*recv_waits,*send_waits2,*recv_waits2; 112*1447629fSBarry Smith MPI_Status recv_status; 113*1447629fSBarry Smith PetscMPIInt nindices,source,widx; 114*1447629fSBarry Smith PetscInt *rbuf,*sbuf; 115*1447629fSBarry Smith MPI_Status *send_status,*send_status2; 116*1447629fSBarry Smith 117*1447629fSBarry Smith PetscFunctionBegin; 118*1447629fSBarry Smith ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr); 119*1447629fSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 120*1447629fSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 121*1447629fSBarry Smith 122*1447629fSBarry Smith /* first count number of contributors to each processor */ 123*1447629fSBarry Smith ierr = PetscMalloc2(2*size,PetscInt,&nprocs,size,PetscInt,&start);CHKERRQ(ierr); 124*1447629fSBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 125*1447629fSBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&owner);CHKERRQ(ierr); 126*1447629fSBarry Smith ierr = PetscMemzero(owner,n*sizeof(PetscInt));CHKERRQ(ierr); 127*1447629fSBarry Smith 128*1447629fSBarry Smith j = 0; 129*1447629fSBarry Smith lastidx = -1; 130*1447629fSBarry Smith for (i=0; i<n; i++) { 131*1447629fSBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 132*1447629fSBarry Smith if (lastidx > (idx = ia[i])) j = 0; 133*1447629fSBarry Smith lastidx = idx; 134*1447629fSBarry Smith for (; j<size; j++) { 135*1447629fSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 136*1447629fSBarry Smith nprocs[2*j]++; /* num of indices to be sent */ 137*1447629fSBarry Smith nprocs[2*j+1] = 1; /* send to proc[j] */ 138*1447629fSBarry Smith owner[i] = j; 139*1447629fSBarry Smith break; 140*1447629fSBarry Smith } 141*1447629fSBarry Smith } 142*1447629fSBarry Smith } 143*1447629fSBarry Smith nprocs[2*rank]=nprocs[2*rank+1]=0; /* do not receive from self! */ 144*1447629fSBarry Smith nsends = 0; 145*1447629fSBarry Smith for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 146*1447629fSBarry Smith 147*1447629fSBarry Smith /* inform other processors of number of messages and max length*/ 148*1447629fSBarry Smith ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr); 149*1447629fSBarry Smith 150*1447629fSBarry Smith /* allocate arrays */ 151*1447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag1);CHKERRQ(ierr); 152*1447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag2);CHKERRQ(ierr); 153*1447629fSBarry Smith 154*1447629fSBarry Smith ierr = PetscMalloc2(nreceives*nmax,PetscInt,&rindices,nreceives,MPI_Request,&recv_waits);CHKERRQ(ierr); 155*1447629fSBarry Smith ierr = PetscMalloc2(nsends*nmax,PetscInt,&rindices2,nsends,MPI_Request,&recv_waits2);CHKERRQ(ierr); 156*1447629fSBarry Smith 157*1447629fSBarry Smith ierr = PetscMalloc3(n,PetscInt,&sindices,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);CHKERRQ(ierr); 158*1447629fSBarry Smith ierr = PetscMalloc3(n,PetscInt,&sindices2,nreceives,MPI_Request,&send_waits2,nreceives,MPI_Status,&send_status2);CHKERRQ(ierr); 159*1447629fSBarry Smith 160*1447629fSBarry Smith /* post 1st receives: receive others requests 161*1447629fSBarry Smith since we don't know how long each individual message is we 162*1447629fSBarry Smith allocate the largest needed buffer for each receive. Potentially 163*1447629fSBarry Smith this is a lot of wasted space. 164*1447629fSBarry Smith */ 165*1447629fSBarry Smith for (i=0,count=0; i<nreceives; i++) { 166*1447629fSBarry Smith ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr); 167*1447629fSBarry Smith } 168*1447629fSBarry Smith 169*1447629fSBarry Smith /* do 1st sends: 170*1447629fSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 171*1447629fSBarry Smith the ith processor 172*1447629fSBarry Smith */ 173*1447629fSBarry Smith start[0] = 0; 174*1447629fSBarry Smith for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2]; 175*1447629fSBarry Smith for (i=0; i<n; i++) { 176*1447629fSBarry Smith j = owner[i]; 177*1447629fSBarry Smith if (j != rank) { 178*1447629fSBarry Smith sindices[start[j]++] = ia[i]; 179*1447629fSBarry Smith } else { /* compute my own map */ 180*1447629fSBarry Smith if (ia[i] >= owners[rank] && ia[i] < owners[rank+1]) { 181*1447629fSBarry Smith ia[i] = maploc[ia[i]-owners[rank]]; 182*1447629fSBarry Smith } else { 183*1447629fSBarry Smith ia[i] = -1; /* ia[i] is not in the range of 0 and N-1, maps it to -1 */ 184*1447629fSBarry Smith } 185*1447629fSBarry Smith } 186*1447629fSBarry Smith } 187*1447629fSBarry Smith 188*1447629fSBarry Smith start[0] = 0; 189*1447629fSBarry Smith for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2]; 190*1447629fSBarry Smith for (i=0,count=0; i<size; i++) { 191*1447629fSBarry Smith if (nprocs[2*i+1]) { 192*1447629fSBarry Smith /* send my request to others */ 193*1447629fSBarry Smith ierr = MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag1,comm,send_waits+count);CHKERRQ(ierr); 194*1447629fSBarry Smith /* post receive for the answer of my request */ 195*1447629fSBarry Smith ierr = MPI_Irecv(sindices2+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);CHKERRQ(ierr); 196*1447629fSBarry Smith count++; 197*1447629fSBarry Smith } 198*1447629fSBarry Smith } 199*1447629fSBarry Smith if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count); 200*1447629fSBarry Smith 201*1447629fSBarry Smith /* wait on 1st sends */ 202*1447629fSBarry Smith if (nsends) { 203*1447629fSBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 204*1447629fSBarry Smith } 205*1447629fSBarry Smith 206*1447629fSBarry Smith /* 1st recvs: other's requests */ 207*1447629fSBarry Smith for (j=0; j< nreceives; j++) { 208*1447629fSBarry Smith ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr); /* idx: index of handle for operation that completed */ 209*1447629fSBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr); 210*1447629fSBarry Smith rbuf = rindices+nmax*widx; /* global index */ 211*1447629fSBarry Smith source = recv_status.MPI_SOURCE; 212*1447629fSBarry Smith 213*1447629fSBarry Smith /* compute mapping */ 214*1447629fSBarry Smith sbuf = rbuf; 215*1447629fSBarry Smith for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]]; 216*1447629fSBarry Smith 217*1447629fSBarry Smith /* send mapping back to the sender */ 218*1447629fSBarry Smith ierr = MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);CHKERRQ(ierr); 219*1447629fSBarry Smith } 220*1447629fSBarry Smith 221*1447629fSBarry Smith /* wait on 2nd sends */ 222*1447629fSBarry Smith if (nreceives) { 223*1447629fSBarry Smith ierr = MPI_Waitall(nreceives,send_waits2,send_status2);CHKERRQ(ierr); 224*1447629fSBarry Smith } 225*1447629fSBarry Smith 226*1447629fSBarry Smith /* 2nd recvs: for the answer of my request */ 227*1447629fSBarry Smith for (j=0; j< nsends; j++) { 228*1447629fSBarry Smith ierr = MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);CHKERRQ(ierr); 229*1447629fSBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr); 230*1447629fSBarry Smith source = recv_status.MPI_SOURCE; 231*1447629fSBarry Smith /* pack output ia[] */ 232*1447629fSBarry Smith rbuf = sindices2+start[source]; 233*1447629fSBarry Smith count = 0; 234*1447629fSBarry Smith for (i=0; i<n; i++) { 235*1447629fSBarry Smith if (source == owner[i]) ia[i] = rbuf[count++]; 236*1447629fSBarry Smith } 237*1447629fSBarry Smith } 238*1447629fSBarry Smith 239*1447629fSBarry Smith /* free arrays */ 240*1447629fSBarry Smith ierr = PetscFree2(nprocs,start);CHKERRQ(ierr); 241*1447629fSBarry Smith ierr = PetscFree(owner);CHKERRQ(ierr); 242*1447629fSBarry Smith ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr); 243*1447629fSBarry Smith ierr = PetscFree2(rindices2,recv_waits2);CHKERRQ(ierr); 244*1447629fSBarry Smith ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr); 245*1447629fSBarry Smith ierr = PetscFree3(sindices2,send_waits2,send_status2);CHKERRQ(ierr); 246*1447629fSBarry Smith PetscFunctionReturn(0); 247*1447629fSBarry Smith } 248*1447629fSBarry Smith 249*1447629fSBarry Smith #undef __FUNCT__ 250*1447629fSBarry Smith #define __FUNCT__ "AOPetscToApplication_MemoryScalable" 251*1447629fSBarry Smith PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia) 252*1447629fSBarry Smith { 253*1447629fSBarry Smith PetscErrorCode ierr; 254*1447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 255*1447629fSBarry Smith PetscInt *app_loc = aomems->app_loc; 256*1447629fSBarry Smith 257*1447629fSBarry Smith PetscFunctionBegin; 258*1447629fSBarry Smith ierr = AOMap_MemoryScalable_private(ao,n,ia,app_loc);CHKERRQ(ierr); 259*1447629fSBarry Smith PetscFunctionReturn(0); 260*1447629fSBarry Smith } 261*1447629fSBarry Smith 262*1447629fSBarry Smith #undef __FUNCT__ 263*1447629fSBarry Smith #define __FUNCT__ "AOApplicationToPetsc_MemoryScalable" 264*1447629fSBarry Smith PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia) 265*1447629fSBarry Smith { 266*1447629fSBarry Smith PetscErrorCode ierr; 267*1447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 268*1447629fSBarry Smith PetscInt *petsc_loc = aomems->petsc_loc; 269*1447629fSBarry Smith 270*1447629fSBarry Smith PetscFunctionBegin; 271*1447629fSBarry Smith ierr = AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);CHKERRQ(ierr); 272*1447629fSBarry Smith PetscFunctionReturn(0); 273*1447629fSBarry Smith } 274*1447629fSBarry Smith 275*1447629fSBarry Smith static struct _AOOps AOOps_MemoryScalable = { 276*1447629fSBarry Smith AOView_MemoryScalable, 277*1447629fSBarry Smith AODestroy_MemoryScalable, 278*1447629fSBarry Smith AOPetscToApplication_MemoryScalable, 279*1447629fSBarry Smith AOApplicationToPetsc_MemoryScalable, 280*1447629fSBarry Smith 0, 281*1447629fSBarry Smith 0, 282*1447629fSBarry Smith 0, 283*1447629fSBarry Smith 0 284*1447629fSBarry Smith }; 285*1447629fSBarry Smith 286*1447629fSBarry Smith #undef __FUNCT__ 287*1447629fSBarry Smith #define __FUNCT__ "AOCreateMemoryScalable_private" 288*1447629fSBarry Smith PetscErrorCode AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc) 289*1447629fSBarry Smith { 290*1447629fSBarry Smith PetscErrorCode ierr; 291*1447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; 292*1447629fSBarry Smith PetscLayout map = aomems->map; 293*1447629fSBarry Smith PetscInt n_local = map->n,i,j; 294*1447629fSBarry Smith PetscMPIInt rank,size,tag; 295*1447629fSBarry Smith PetscInt *owner,*start,*nprocs,nsends,nreceives; 296*1447629fSBarry Smith PetscInt nmax,count,*sindices,*rindices,idx,lastidx; 297*1447629fSBarry Smith PetscInt *owners = aomems->map->range; 298*1447629fSBarry Smith MPI_Request *send_waits,*recv_waits; 299*1447629fSBarry Smith MPI_Status recv_status; 300*1447629fSBarry Smith PetscMPIInt nindices,widx; 301*1447629fSBarry Smith PetscInt *rbuf; 302*1447629fSBarry Smith PetscInt n=napp,ip,ia; 303*1447629fSBarry Smith MPI_Status *send_status; 304*1447629fSBarry Smith 305*1447629fSBarry Smith PetscFunctionBegin; 306*1447629fSBarry Smith ierr = PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr); 307*1447629fSBarry Smith 308*1447629fSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 309*1447629fSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 310*1447629fSBarry Smith 311*1447629fSBarry Smith /* first count number of contributors (of from_array[]) to each processor */ 312*1447629fSBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 313*1447629fSBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 314*1447629fSBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&owner);CHKERRQ(ierr); 315*1447629fSBarry Smith 316*1447629fSBarry Smith j = 0; 317*1447629fSBarry Smith lastidx = -1; 318*1447629fSBarry Smith for (i=0; i<n; i++) { 319*1447629fSBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 320*1447629fSBarry Smith if (lastidx > (idx = from_array[i])) j = 0; 321*1447629fSBarry Smith lastidx = idx; 322*1447629fSBarry Smith for (; j<size; j++) { 323*1447629fSBarry Smith if (idx >= owners[j] && idx < owners[j+1]) { 324*1447629fSBarry Smith nprocs[2*j] += 2; /* num of indices to be sent - in pairs (ip,ia) */ 325*1447629fSBarry Smith nprocs[2*j+1] = 1; /* send to proc[j] */ 326*1447629fSBarry Smith owner[i] = j; 327*1447629fSBarry Smith break; 328*1447629fSBarry Smith } 329*1447629fSBarry Smith } 330*1447629fSBarry Smith } 331*1447629fSBarry Smith nprocs[2*rank]=nprocs[2*rank+1]=0; /* do not receive from self! */ 332*1447629fSBarry Smith nsends = 0; 333*1447629fSBarry Smith for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 334*1447629fSBarry Smith 335*1447629fSBarry Smith /* inform other processors of number of messages and max length*/ 336*1447629fSBarry Smith ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr); 337*1447629fSBarry Smith 338*1447629fSBarry Smith /* allocate arrays */ 339*1447629fSBarry Smith ierr = PetscObjectGetNewTag((PetscObject)ao,&tag);CHKERRQ(ierr); 340*1447629fSBarry Smith ierr = PetscMalloc2(nreceives*nmax,PetscInt,&rindices,nreceives,MPI_Request,&recv_waits);CHKERRQ(ierr); 341*1447629fSBarry Smith ierr = PetscMalloc3(2*n,PetscInt,&sindices,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);CHKERRQ(ierr); 342*1447629fSBarry Smith ierr = PetscMalloc(size*sizeof(PetscInt),&start);CHKERRQ(ierr); 343*1447629fSBarry Smith 344*1447629fSBarry Smith /* post receives: */ 345*1447629fSBarry Smith for (i=0; i<nreceives; i++) { 346*1447629fSBarry Smith ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 347*1447629fSBarry Smith } 348*1447629fSBarry Smith 349*1447629fSBarry Smith /* do sends: 350*1447629fSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to 351*1447629fSBarry Smith the ith processor 352*1447629fSBarry Smith */ 353*1447629fSBarry Smith start[0] = 0; 354*1447629fSBarry Smith for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2]; 355*1447629fSBarry Smith for (i=0; i<n; i++) { 356*1447629fSBarry Smith j = owner[i]; 357*1447629fSBarry Smith if (j != rank) { 358*1447629fSBarry Smith ip = from_array[i]; 359*1447629fSBarry Smith ia = to_array[i]; 360*1447629fSBarry Smith sindices[start[j]++] = ip; 361*1447629fSBarry Smith sindices[start[j]++] = ia; 362*1447629fSBarry Smith } else { /* compute my own map */ 363*1447629fSBarry Smith ip = from_array[i] - owners[rank]; 364*1447629fSBarry Smith ia = to_array[i]; 365*1447629fSBarry Smith aomap_loc[ip] = ia; 366*1447629fSBarry Smith } 367*1447629fSBarry Smith } 368*1447629fSBarry Smith 369*1447629fSBarry Smith start[0] = 0; 370*1447629fSBarry Smith for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2]; 371*1447629fSBarry Smith for (i=0,count=0; i<size; i++) { 372*1447629fSBarry Smith if (nprocs[2*i+1]) { 373*1447629fSBarry Smith ierr = MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRQ(ierr); 374*1447629fSBarry Smith count++; 375*1447629fSBarry Smith } 376*1447629fSBarry Smith } 377*1447629fSBarry Smith if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count); 378*1447629fSBarry Smith 379*1447629fSBarry Smith /* wait on sends */ 380*1447629fSBarry Smith if (nsends) { 381*1447629fSBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 382*1447629fSBarry Smith } 383*1447629fSBarry Smith 384*1447629fSBarry Smith /* recvs */ 385*1447629fSBarry Smith count=0; 386*1447629fSBarry Smith for (j= nreceives; j>0; j--) { 387*1447629fSBarry Smith ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr); 388*1447629fSBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr); 389*1447629fSBarry Smith rbuf = rindices+nmax*widx; /* global index */ 390*1447629fSBarry Smith 391*1447629fSBarry Smith /* compute local mapping */ 392*1447629fSBarry Smith for (i=0; i<nindices; i+=2) { /* pack aomap_loc */ 393*1447629fSBarry Smith ip = rbuf[i] - owners[rank]; /* local index */ 394*1447629fSBarry Smith ia = rbuf[i+1]; 395*1447629fSBarry Smith aomap_loc[ip] = ia; 396*1447629fSBarry Smith } 397*1447629fSBarry Smith count++; 398*1447629fSBarry Smith } 399*1447629fSBarry Smith 400*1447629fSBarry Smith ierr = PetscFree(start);CHKERRQ(ierr); 401*1447629fSBarry Smith ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr); 402*1447629fSBarry Smith ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr); 403*1447629fSBarry Smith ierr = PetscFree(owner);CHKERRQ(ierr); 404*1447629fSBarry Smith ierr = PetscFree(nprocs);CHKERRQ(ierr); 405*1447629fSBarry Smith PetscFunctionReturn(0); 406*1447629fSBarry Smith } 407*1447629fSBarry Smith 408*1447629fSBarry Smith EXTERN_C_BEGIN 409*1447629fSBarry Smith #undef __FUNCT__ 410*1447629fSBarry Smith #define __FUNCT__ "AOCreate_MemoryScalable" 411*1447629fSBarry Smith PetscErrorCode AOCreate_MemoryScalable(AO ao) 412*1447629fSBarry Smith { 413*1447629fSBarry Smith PetscErrorCode ierr; 414*1447629fSBarry Smith IS isapp=ao->isapp,ispetsc=ao->ispetsc; 415*1447629fSBarry Smith const PetscInt *mypetsc,*myapp; 416*1447629fSBarry Smith PetscInt napp,n_local,N,i,start,*petsc,*lens,*disp; 417*1447629fSBarry Smith MPI_Comm comm; 418*1447629fSBarry Smith AO_MemoryScalable *aomems; 419*1447629fSBarry Smith PetscLayout map; 420*1447629fSBarry Smith PetscMPIInt size,rank; 421*1447629fSBarry Smith 422*1447629fSBarry Smith PetscFunctionBegin; 423*1447629fSBarry Smith /* create special struct aomems */ 424*1447629fSBarry Smith ierr = PetscNewLog(ao, AO_MemoryScalable, &aomems);CHKERRQ(ierr); 425*1447629fSBarry Smith ao->data = (void*) aomems; 426*1447629fSBarry Smith ierr = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr); 427*1447629fSBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr); 428*1447629fSBarry Smith 429*1447629fSBarry Smith /* transmit all local lengths of isapp to all processors */ 430*1447629fSBarry Smith ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr); 431*1447629fSBarry Smith ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 432*1447629fSBarry Smith ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 433*1447629fSBarry Smith ierr = PetscMalloc2(size,PetscInt,&lens,size,PetscInt,&disp);CHKERRQ(ierr); 434*1447629fSBarry Smith ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr); 435*1447629fSBarry Smith ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRQ(ierr); 436*1447629fSBarry Smith 437*1447629fSBarry Smith N = 0; 438*1447629fSBarry Smith for (i = 0; i < size; i++) { 439*1447629fSBarry Smith disp[i] = N; 440*1447629fSBarry Smith N += lens[i]; 441*1447629fSBarry Smith } 442*1447629fSBarry Smith 443*1447629fSBarry Smith /* If ispetsc is 0 then use "natural" numbering */ 444*1447629fSBarry Smith if (napp) { 445*1447629fSBarry Smith if (!ispetsc) { 446*1447629fSBarry Smith start = disp[rank]; 447*1447629fSBarry Smith ierr = PetscMalloc((napp+1) * sizeof(PetscInt), &petsc);CHKERRQ(ierr); 448*1447629fSBarry Smith for (i=0; i<napp; i++) petsc[i] = start + i; 449*1447629fSBarry Smith } else { 450*1447629fSBarry Smith ierr = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr); 451*1447629fSBarry Smith petsc = (PetscInt*)mypetsc; 452*1447629fSBarry Smith } 453*1447629fSBarry Smith } 454*1447629fSBarry Smith 455*1447629fSBarry Smith /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */ 456*1447629fSBarry Smith ierr = PetscLayoutCreate(comm,&map);CHKERRQ(ierr); 457*1447629fSBarry Smith map->bs = 1; 458*1447629fSBarry Smith map->N = N; 459*1447629fSBarry Smith ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 460*1447629fSBarry Smith 461*1447629fSBarry Smith ao->N = N; 462*1447629fSBarry Smith ao->n = map->n; 463*1447629fSBarry Smith aomems->map = map; 464*1447629fSBarry Smith 465*1447629fSBarry Smith /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */ 466*1447629fSBarry Smith n_local = map->n; 467*1447629fSBarry Smith ierr = PetscMalloc2(n_local,PetscInt, &aomems->app_loc,n_local,PetscInt,&aomems->petsc_loc);CHKERRQ(ierr); 468*1447629fSBarry Smith ierr = PetscLogObjectMemory(ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr); 469*1447629fSBarry Smith ierr = PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr); 470*1447629fSBarry Smith ierr = PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr); 471*1447629fSBarry Smith ierr = ISGetIndices(isapp,&myapp);CHKERRQ(ierr); 472*1447629fSBarry Smith 473*1447629fSBarry Smith ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr); 474*1447629fSBarry Smith ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr); 475*1447629fSBarry Smith 476*1447629fSBarry Smith ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr); 477*1447629fSBarry Smith if (napp) { 478*1447629fSBarry Smith if (ispetsc) { 479*1447629fSBarry Smith ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr); 480*1447629fSBarry Smith } else { 481*1447629fSBarry Smith ierr = PetscFree(petsc);CHKERRQ(ierr); 482*1447629fSBarry Smith } 483*1447629fSBarry Smith } 484*1447629fSBarry Smith ierr = PetscFree2(lens,disp);CHKERRQ(ierr); 485*1447629fSBarry Smith PetscFunctionReturn(0); 486*1447629fSBarry Smith } 487*1447629fSBarry Smith EXTERN_C_END 488*1447629fSBarry Smith 489*1447629fSBarry Smith #undef __FUNCT__ 490*1447629fSBarry Smith #define __FUNCT__ "AOCreateMemoryScalable" 491*1447629fSBarry Smith /*@C 492*1447629fSBarry Smith AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays. 493*1447629fSBarry Smith 494*1447629fSBarry Smith Collective on MPI_Comm 495*1447629fSBarry Smith 496*1447629fSBarry Smith Input Parameters: 497*1447629fSBarry Smith + comm - MPI communicator that is to share AO 498*1447629fSBarry Smith . napp - size of integer arrays 499*1447629fSBarry Smith . myapp - integer array that defines an ordering 500*1447629fSBarry Smith - mypetsc - integer array that defines another ordering (may be NULL to 501*1447629fSBarry Smith indicate the natural ordering, that is 0,1,2,3,...) 502*1447629fSBarry Smith 503*1447629fSBarry Smith Output Parameter: 504*1447629fSBarry Smith . aoout - the new application ordering 505*1447629fSBarry Smith 506*1447629fSBarry Smith Level: beginner 507*1447629fSBarry Smith 508*1447629fSBarry Smith Notes: The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes" 509*1447629fSBarry Smith in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices. 510*1447629fSBarry Smith Comparing with AOCreateBasic(), this routine trades memory with message communication. 511*1447629fSBarry Smith 512*1447629fSBarry Smith .keywords: AO, create 513*1447629fSBarry Smith 514*1447629fSBarry Smith .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc() 515*1447629fSBarry Smith @*/ 516*1447629fSBarry Smith PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout) 517*1447629fSBarry Smith { 518*1447629fSBarry Smith PetscErrorCode ierr; 519*1447629fSBarry Smith IS isapp,ispetsc; 520*1447629fSBarry Smith const PetscInt *app=myapp,*petsc=mypetsc; 521*1447629fSBarry Smith 522*1447629fSBarry Smith PetscFunctionBegin; 523*1447629fSBarry Smith ierr = ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);CHKERRQ(ierr); 524*1447629fSBarry Smith if (mypetsc) { 525*1447629fSBarry Smith ierr = ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);CHKERRQ(ierr); 526*1447629fSBarry Smith } else { 527*1447629fSBarry Smith ispetsc = NULL; 528*1447629fSBarry Smith } 529*1447629fSBarry Smith ierr = AOCreateMemoryScalableIS(isapp,ispetsc,aoout);CHKERRQ(ierr); 530*1447629fSBarry Smith ierr = ISDestroy(&isapp);CHKERRQ(ierr); 531*1447629fSBarry Smith if (mypetsc) { 532*1447629fSBarry Smith ierr = ISDestroy(&ispetsc);CHKERRQ(ierr); 533*1447629fSBarry Smith } 534*1447629fSBarry Smith PetscFunctionReturn(0); 535*1447629fSBarry Smith } 536*1447629fSBarry Smith 537*1447629fSBarry Smith #undef __FUNCT__ 538*1447629fSBarry Smith #define __FUNCT__ "AOCreateMemoryScalableIS" 539*1447629fSBarry Smith /*@C 540*1447629fSBarry Smith AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets. 541*1447629fSBarry Smith 542*1447629fSBarry Smith Collective on IS 543*1447629fSBarry Smith 544*1447629fSBarry Smith Input Parameters: 545*1447629fSBarry Smith + isapp - index set that defines an ordering 546*1447629fSBarry Smith - ispetsc - index set that defines another ordering (may be NULL to use the 547*1447629fSBarry Smith natural ordering) 548*1447629fSBarry Smith 549*1447629fSBarry Smith Output Parameter: 550*1447629fSBarry Smith . aoout - the new application ordering 551*1447629fSBarry Smith 552*1447629fSBarry Smith Level: beginner 553*1447629fSBarry Smith 554*1447629fSBarry Smith Notes: The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates; 555*1447629fSBarry Smith that is there cannot be any "holes". 556*1447629fSBarry Smith Comparing with AOCreateBasicIS(), this routine trades memory with message communication. 557*1447629fSBarry Smith .keywords: AO, create 558*1447629fSBarry Smith 559*1447629fSBarry Smith .seealso: AOCreateMemoryScalable(), AODestroy() 560*1447629fSBarry Smith @*/ 561*1447629fSBarry Smith PetscErrorCode AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout) 562*1447629fSBarry Smith { 563*1447629fSBarry Smith PetscErrorCode ierr; 564*1447629fSBarry Smith MPI_Comm comm; 565*1447629fSBarry Smith AO ao; 566*1447629fSBarry Smith 567*1447629fSBarry Smith PetscFunctionBegin; 568*1447629fSBarry Smith ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr); 569*1447629fSBarry Smith ierr = AOCreate(comm,&ao);CHKERRQ(ierr); 570*1447629fSBarry Smith ierr = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr); 571*1447629fSBarry Smith ierr = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr); 572*1447629fSBarry Smith *aoout = ao; 573*1447629fSBarry Smith PetscFunctionReturn(0); 574*1447629fSBarry Smith } 575