1*095059a4SDave May /* 2*095059a4SDave May Build a few basic tools to help with partitioned domains. 3*095059a4SDave May 4*095059a4SDave May 1) 5*095059a4SDave May On each processor, have a DomainExchangerTopology. 6*095059a4SDave May This is a doubly-connected edge list which enumerates the 7*095059a4SDave May communication paths between connected processors. By numbering 8*095059a4SDave May these paths we can always uniquely assign message identifers. 9*095059a4SDave May 10*095059a4SDave May edge 11*095059a4SDave May 10 12*095059a4SDave May proc ---------> proc 13*095059a4SDave May 0 <-------- 1 14*095059a4SDave May 11 15*095059a4SDave May twin 16*095059a4SDave May 17*095059a4SDave May Eg: Proc 0 send to proc 1 with message id is 10. To recieve the correct 18*095059a4SDave May message, proc 1 looks for the edge connected to proc 0, and then the 19*095059a4SDave May messgae id comes from the twin of that edge 20*095059a4SDave May 21*095059a4SDave May 2) 22*095059a4SDave May A DomainExchangerArrayPacker. 23*095059a4SDave May A little function which given a piece of data, will memcpy the data into 24*095059a4SDave May an array (which will be sent to procs) into the correct place. 25*095059a4SDave May 26*095059a4SDave May On Proc 1 we sent data to procs 0,2,3. The data is on different lengths. 27*095059a4SDave May All data gets jammed into single array. Need to "jam" data into correct locations 28*095059a4SDave May The Packer knows how much is to going to each processor and keeps track of the inserts 29*095059a4SDave May so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory 30*095059a4SDave May 31*095059a4SDave May data to 0 data to 2 data to 3 32*095059a4SDave May 33*095059a4SDave May |--------|-----------------|--| 34*095059a4SDave May 35*095059a4SDave May 36*095059a4SDave May User has to unpack message themselves. I can get you the pointer for each i 37*095059a4SDave May entry, but you'll have to cast it to the appropriate data type. 38*095059a4SDave May 39*095059a4SDave May 40*095059a4SDave May 41*095059a4SDave May 42*095059a4SDave May Phase A: Build topology 43*095059a4SDave May 44*095059a4SDave May Phase B: Define message lengths 45*095059a4SDave May 46*095059a4SDave May Phase C: Pack data 47*095059a4SDave May 48*095059a4SDave May Phase D: Send data 49*095059a4SDave May 50*095059a4SDave May // 51*095059a4SDave May DataExCreate() 52*095059a4SDave May // A 53*095059a4SDave May DataExTopologyInitialize() 54*095059a4SDave May DataExTopologyAddNeighbour() 55*095059a4SDave May DataExTopologyAddNeighbour() 56*095059a4SDave May DataExTopologyFinalize() 57*095059a4SDave May // B 58*095059a4SDave May DataExZeroAllSendCount() 59*095059a4SDave May DataExAddToSendCount() 60*095059a4SDave May DataExAddToSendCount() 61*095059a4SDave May DataExAddToSendCount() 62*095059a4SDave May // C 63*095059a4SDave May DataExPackInitialize() 64*095059a4SDave May DataExPackData() 65*095059a4SDave May DataExPackData() 66*095059a4SDave May DataExPackFinalize() 67*095059a4SDave May // D 68*095059a4SDave May DataExBegin() 69*095059a4SDave May // ... perform any calculations ... /// 70*095059a4SDave May DataExEnd() 71*095059a4SDave May 72*095059a4SDave May // Call any getters // 73*095059a4SDave May 74*095059a4SDave May 75*095059a4SDave May */ 76*095059a4SDave May 77*095059a4SDave May #include <petsc.h> 78*095059a4SDave May #include <petscvec.h> 79*095059a4SDave May #include <petscmat.h> 80*095059a4SDave May 81*095059a4SDave May #include "data_ex.h" 82*095059a4SDave May 83*095059a4SDave May const char *status_names[] = { "initialized", "finalized", "unknown" }; 84*095059a4SDave May 85*095059a4SDave May PetscLogEvent PTATIN_DataExchangerTopologySetup; 86*095059a4SDave May PetscLogEvent PTATIN_DataExchangerBegin; 87*095059a4SDave May PetscLogEvent PTATIN_DataExchangerEnd; 88*095059a4SDave May 89*095059a4SDave May 90*095059a4SDave May #undef __FUNCT__ 91*095059a4SDave May #define __FUNCT__ "DataExCreate" 92*095059a4SDave May DataEx DataExCreate(MPI_Comm comm,const PetscInt count) 93*095059a4SDave May { 94*095059a4SDave May DataEx d; 95*095059a4SDave May PetscErrorCode ierr; 96*095059a4SDave May 97*095059a4SDave May d = (DataEx)malloc( sizeof(struct _p_DataEx) ); 98*095059a4SDave May memset( d, 0, sizeof(struct _p_DataEx) ); 99*095059a4SDave May 100*095059a4SDave May ierr = MPI_Comm_dup(comm,&d->comm); 101*095059a4SDave May ierr = MPI_Comm_rank(d->comm,&d->rank); 102*095059a4SDave May 103*095059a4SDave May d->instance = count; 104*095059a4SDave May 105*095059a4SDave May d->topology_status = DEOBJECT_STATE_UNKNOWN; 106*095059a4SDave May d->message_lengths_status = DEOBJECT_STATE_UNKNOWN; 107*095059a4SDave May d->packer_status = DEOBJECT_STATE_UNKNOWN; 108*095059a4SDave May d->communication_status = DEOBJECT_STATE_UNKNOWN; 109*095059a4SDave May 110*095059a4SDave May d->n_neighbour_procs = -1; 111*095059a4SDave May d->neighbour_procs = NULL; 112*095059a4SDave May 113*095059a4SDave May d->messages_to_be_sent = NULL; 114*095059a4SDave May d->message_offsets = NULL; 115*095059a4SDave May d->messages_to_be_recvieved = NULL; 116*095059a4SDave May 117*095059a4SDave May d->unit_message_size = -1; 118*095059a4SDave May d->send_message = NULL; 119*095059a4SDave May d->send_message_length = -1; 120*095059a4SDave May d->recv_message = NULL; 121*095059a4SDave May d->recv_message_length = -1; 122*095059a4SDave May d->total_pack_cnt = -1; 123*095059a4SDave May d->pack_cnt = NULL; 124*095059a4SDave May 125*095059a4SDave May d->send_tags = NULL; 126*095059a4SDave May d->recv_tags = NULL; 127*095059a4SDave May 128*095059a4SDave May d->_stats = NULL; 129*095059a4SDave May d->_requests = NULL; 130*095059a4SDave May 131*095059a4SDave May return d; 132*095059a4SDave May } 133*095059a4SDave May 134*095059a4SDave May #undef __FUNCT__ 135*095059a4SDave May #define __FUNCT__ "DataExView" 136*095059a4SDave May PetscErrorCode DataExView(DataEx d) 137*095059a4SDave May { 138*095059a4SDave May PetscMPIInt p; 139*095059a4SDave May 140*095059a4SDave May 141*095059a4SDave May PetscFunctionBegin; 142*095059a4SDave May PetscPrintf( PETSC_COMM_WORLD, "DataEx: instance=%D\n",d->instance); 143*095059a4SDave May 144*095059a4SDave May PetscPrintf( PETSC_COMM_WORLD, " topology status: %s \n", status_names[d->topology_status]); 145*095059a4SDave May PetscPrintf( PETSC_COMM_WORLD, " message lengths status: %s \n", status_names[d->message_lengths_status] ); 146*095059a4SDave May PetscPrintf( PETSC_COMM_WORLD, " packer status status: %s \n", status_names[d->packer_status] ); 147*095059a4SDave May PetscPrintf( PETSC_COMM_WORLD, " communication status: %s \n", status_names[d->communication_status] ); 148*095059a4SDave May 149*095059a4SDave May if (d->topology_status == DEOBJECT_FINALIZED) { 150*095059a4SDave May PetscPrintf( PETSC_COMM_WORLD, " Topology:\n"); 151*095059a4SDave May PetscPrintf( PETSC_COMM_SELF, " [%d] neighbours: %d \n", (int)d->rank, (int)d->n_neighbour_procs ); 152*095059a4SDave May for (p=0; p<d->n_neighbour_procs; p++) { 153*095059a4SDave May PetscPrintf( PETSC_COMM_SELF, " [%d] neighbour[%D] = %d \n", (int)d->rank, p, (int)d->neighbour_procs[p]); 154*095059a4SDave May } 155*095059a4SDave May } 156*095059a4SDave May 157*095059a4SDave May if (d->message_lengths_status == DEOBJECT_FINALIZED) { 158*095059a4SDave May PetscPrintf( PETSC_COMM_WORLD, " Message lengths:\n"); 159*095059a4SDave May PetscPrintf( PETSC_COMM_SELF, " [%d] atomic size: %ld \n", (int)d->rank, (long int)d->unit_message_size ); 160*095059a4SDave May for (p=0; p<d->n_neighbour_procs; p++) { 161*095059a4SDave May PetscPrintf( PETSC_COMM_SELF, " [%d] >>>>> ( %D units :: tag = %d ) >>>>> [%d] \n", (int)d->rank, d->messages_to_be_sent[p], d->send_tags[p], (int)d->neighbour_procs[p] ); 162*095059a4SDave May } 163*095059a4SDave May for (p=0; p<d->n_neighbour_procs; p++) { 164*095059a4SDave May PetscPrintf( PETSC_COMM_SELF, " [%d] <<<<< ( %D units :: tag = %d ) <<<<< [%d] \n", (int)d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], (int)d->neighbour_procs[p] ); 165*095059a4SDave May } 166*095059a4SDave May } 167*095059a4SDave May 168*095059a4SDave May if (d->packer_status == DEOBJECT_FINALIZED) { 169*095059a4SDave May 170*095059a4SDave May } 171*095059a4SDave May 172*095059a4SDave May if (d->communication_status == DEOBJECT_FINALIZED) { 173*095059a4SDave May 174*095059a4SDave May } 175*095059a4SDave May 176*095059a4SDave May PetscFunctionReturn(0); 177*095059a4SDave May } 178*095059a4SDave May 179*095059a4SDave May #undef __FUNCT__ 180*095059a4SDave May #define __FUNCT__ "DataExDestroy" 181*095059a4SDave May PetscErrorCode DataExDestroy(DataEx d) 182*095059a4SDave May { 183*095059a4SDave May PetscErrorCode ierr; 184*095059a4SDave May 185*095059a4SDave May PetscFunctionBegin; 186*095059a4SDave May ierr = MPI_Comm_free(&d->comm);CHKERRQ(ierr); 187*095059a4SDave May 188*095059a4SDave May if (d->neighbour_procs != NULL) { 189*095059a4SDave May free(d->neighbour_procs); 190*095059a4SDave May } 191*095059a4SDave May 192*095059a4SDave May if (d->messages_to_be_sent != NULL) { 193*095059a4SDave May free(d->messages_to_be_sent); 194*095059a4SDave May } 195*095059a4SDave May 196*095059a4SDave May if (d->message_offsets != NULL) { 197*095059a4SDave May free(d->message_offsets); 198*095059a4SDave May } 199*095059a4SDave May 200*095059a4SDave May if (d->messages_to_be_recvieved != NULL) { 201*095059a4SDave May free(d->messages_to_be_recvieved); 202*095059a4SDave May } 203*095059a4SDave May 204*095059a4SDave May if (d->send_message != NULL) { 205*095059a4SDave May free(d->send_message); 206*095059a4SDave May } 207*095059a4SDave May 208*095059a4SDave May if (d->recv_message != NULL) { 209*095059a4SDave May free(d->recv_message); 210*095059a4SDave May } 211*095059a4SDave May 212*095059a4SDave May if (d->pack_cnt != NULL) { 213*095059a4SDave May free(d->pack_cnt); 214*095059a4SDave May } 215*095059a4SDave May 216*095059a4SDave May if (d->send_tags != NULL) { 217*095059a4SDave May free(d->send_tags); 218*095059a4SDave May } 219*095059a4SDave May if (d->recv_tags != NULL) { 220*095059a4SDave May free(d->recv_tags); 221*095059a4SDave May } 222*095059a4SDave May 223*095059a4SDave May if (d->_stats != NULL) { 224*095059a4SDave May free(d->_stats); 225*095059a4SDave May } 226*095059a4SDave May if (d->_requests != NULL) { 227*095059a4SDave May free(d->_requests); 228*095059a4SDave May } 229*095059a4SDave May 230*095059a4SDave May free(d); 231*095059a4SDave May 232*095059a4SDave May PetscFunctionReturn(0); 233*095059a4SDave May } 234*095059a4SDave May 235*095059a4SDave May /* === Phase A === */ 236*095059a4SDave May 237*095059a4SDave May #undef __FUNCT__ 238*095059a4SDave May #define __FUNCT__ "DataExTopologyInitialize" 239*095059a4SDave May PetscErrorCode DataExTopologyInitialize(DataEx d) 240*095059a4SDave May { 241*095059a4SDave May PetscFunctionBegin; 242*095059a4SDave May d->topology_status = DEOBJECT_INITIALIZED; 243*095059a4SDave May 244*095059a4SDave May d->n_neighbour_procs = 0; 245*095059a4SDave May if (d->neighbour_procs != NULL) { free(d->neighbour_procs); d->neighbour_procs = NULL; } 246*095059a4SDave May if (d->messages_to_be_sent != NULL) { free(d->messages_to_be_sent); d->messages_to_be_sent = NULL; } 247*095059a4SDave May if (d->message_offsets != NULL) { free(d->message_offsets); d->message_offsets = NULL; } 248*095059a4SDave May if (d->messages_to_be_recvieved != NULL) { free(d->messages_to_be_recvieved); d->messages_to_be_recvieved = NULL; } 249*095059a4SDave May if (d->pack_cnt != NULL) { free(d->pack_cnt); d->pack_cnt = NULL; } 250*095059a4SDave May 251*095059a4SDave May if (d->send_tags != NULL) { free(d->send_tags); d->send_tags = NULL; } 252*095059a4SDave May if (d->recv_tags != NULL) { free(d->recv_tags); d->recv_tags = NULL; } 253*095059a4SDave May 254*095059a4SDave May PetscFunctionReturn(0); 255*095059a4SDave May } 256*095059a4SDave May 257*095059a4SDave May #undef __FUNCT__ 258*095059a4SDave May #define __FUNCT__ "DataExTopologyAddNeighbour" 259*095059a4SDave May PetscErrorCode DataExTopologyAddNeighbour(DataEx d,const PetscMPIInt proc_id) 260*095059a4SDave May { 261*095059a4SDave May PetscMPIInt n,found; 262*095059a4SDave May PetscMPIInt nproc; 263*095059a4SDave May PetscErrorCode ierr; 264*095059a4SDave May 265*095059a4SDave May 266*095059a4SDave May PetscFunctionBegin; 267*095059a4SDave May if (d->topology_status == DEOBJECT_FINALIZED) { 268*095059a4SDave May SETERRQ( d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DataExTopologyInitialize() first" ); 269*095059a4SDave May } 270*095059a4SDave May else if (d->topology_status != DEOBJECT_INITIALIZED) { 271*095059a4SDave May SETERRQ( d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first" ); 272*095059a4SDave May } 273*095059a4SDave May 274*095059a4SDave May /* error on negative entries */ 275*095059a4SDave May if (proc_id < 0) { 276*095059a4SDave May SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0"); 277*095059a4SDave May } 278*095059a4SDave May /* error on ranks larger than number of procs in communicator */ 279*095059a4SDave May ierr = MPI_Comm_size(d->comm,&nproc);CHKERRQ(ierr); 280*095059a4SDave May if (proc_id >= nproc) { 281*095059a4SDave May SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank >= nproc"); 282*095059a4SDave May } 283*095059a4SDave May 284*095059a4SDave May if (d->n_neighbour_procs == 0) { 285*095059a4SDave May d->neighbour_procs = (PetscMPIInt*)malloc( sizeof(PetscMPIInt) ); 286*095059a4SDave May } 287*095059a4SDave May 288*095059a4SDave May /* check for proc_id */ 289*095059a4SDave May found = 0; 290*095059a4SDave May for (n=0; n<d->n_neighbour_procs; n++) { 291*095059a4SDave May if (d->neighbour_procs[n] == proc_id) { 292*095059a4SDave May found = 1; 293*095059a4SDave May } 294*095059a4SDave May } 295*095059a4SDave May if (found == 0) { /* add it to list */ 296*095059a4SDave May PetscMPIInt *tmp; 297*095059a4SDave May 298*095059a4SDave May tmp = (PetscMPIInt*)realloc( d->neighbour_procs, sizeof(PetscMPIInt)*(d->n_neighbour_procs+1) ); 299*095059a4SDave May d->neighbour_procs = tmp; 300*095059a4SDave May 301*095059a4SDave May d->neighbour_procs[ d->n_neighbour_procs ] = proc_id; 302*095059a4SDave May d->n_neighbour_procs++; 303*095059a4SDave May } 304*095059a4SDave May 305*095059a4SDave May PetscFunctionReturn(0); 306*095059a4SDave May } 307*095059a4SDave May 308*095059a4SDave May /* 309*095059a4SDave May counter: the index of the communication object 310*095059a4SDave May N: the number of processors 311*095059a4SDave May r0: rank of sender 312*095059a4SDave May r1: rank of receiver 313*095059a4SDave May 314*095059a4SDave May procs = { 0, 1, 2, 3 } 315*095059a4SDave May 316*095059a4SDave May 0 ==> 0 e=0 317*095059a4SDave May 0 ==> 1 e=1 318*095059a4SDave May 0 ==> 2 e=2 319*095059a4SDave May 0 ==> 3 e=3 320*095059a4SDave May 321*095059a4SDave May 1 ==> 0 e=4 322*095059a4SDave May 1 ==> 1 e=5 323*095059a4SDave May 1 ==> 2 e=6 324*095059a4SDave May 1 ==> 3 e=7 325*095059a4SDave May 326*095059a4SDave May 2 ==> 0 e=8 327*095059a4SDave May 2 ==> 1 e=9 328*095059a4SDave May 2 ==> 2 e=10 329*095059a4SDave May 2 ==> 3 e=11 330*095059a4SDave May 331*095059a4SDave May 3 ==> 0 e=12 332*095059a4SDave May 3 ==> 1 e=13 333*095059a4SDave May 3 ==> 2 e=14 334*095059a4SDave May 3 ==> 3 e=15 335*095059a4SDave May 336*095059a4SDave May If we require that proc A sends to proc B, then the SEND tag index will be given by 337*095059a4SDave May N * rank(A) + rank(B) + offset 338*095059a4SDave May If we require that proc A will receive from proc B, then the RECV tag index will be given by 339*095059a4SDave May N * rank(B) + rank(A) + offset 340*095059a4SDave May 341*095059a4SDave May */ 342*095059a4SDave May void _get_tags( PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt ) 343*095059a4SDave May { 344*095059a4SDave May PetscMPIInt st,rt; 345*095059a4SDave May 346*095059a4SDave May 347*095059a4SDave May st = N*r0 + r1 + N*N*counter; 348*095059a4SDave May rt = N*r1 + r0 + N*N*counter; 349*095059a4SDave May 350*095059a4SDave May *_st = st; 351*095059a4SDave May *_rt = rt; 352*095059a4SDave May } 353*095059a4SDave May 354*095059a4SDave May /* 355*095059a4SDave May Makes the communication map symmetric 356*095059a4SDave May */ 357*095059a4SDave May #undef __FUNCT__ 358*095059a4SDave May #define __FUNCT__ "_DataExCompleteCommunicationMap" 359*095059a4SDave May PetscErrorCode _DataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new) 360*095059a4SDave May { 361*095059a4SDave May Mat A,redA; 362*095059a4SDave May PetscInt i,j,nc; 363*095059a4SDave May PetscInt n_, *proc_neighbours_; 364*095059a4SDave May PetscInt rank_i_; 365*095059a4SDave May PetscMPIInt size, rank_i; 366*095059a4SDave May PetscInt max_nnz; 367*095059a4SDave May PetscScalar *vals; 368*095059a4SDave May const PetscInt *cols; 369*095059a4SDave May const PetscScalar *red_vals; 370*095059a4SDave May PetscMPIInt _n_new, *_proc_neighbours_new; 371*095059a4SDave May PetscBool is_seqaij; 372*095059a4SDave May PetscLogDouble t0,t1; 373*095059a4SDave May PetscErrorCode ierr; 374*095059a4SDave May 375*095059a4SDave May 376*095059a4SDave May PetscFunctionBegin; 377*095059a4SDave May PetscPrintf(PETSC_COMM_WORLD,"************************** Starting _DataExCompleteCommunicationMap ************************** \n"); 378*095059a4SDave May PetscTime(&t0); 379*095059a4SDave May 380*095059a4SDave May n_ = n; 381*095059a4SDave May ierr = PetscMalloc( sizeof(PetscInt) * n_, &proc_neighbours_ );CHKERRQ(ierr); 382*095059a4SDave May for (i=0; i<n_; i++) { 383*095059a4SDave May proc_neighbours_[i] = proc_neighbours[i]; 384*095059a4SDave May } 385*095059a4SDave May 386*095059a4SDave May ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 387*095059a4SDave May ierr = MPI_Comm_rank(comm,&rank_i);CHKERRQ(ierr); 388*095059a4SDave May rank_i_ = rank_i; 389*095059a4SDave May 390*095059a4SDave May ierr = MatCreate(comm,&A);CHKERRQ(ierr); 391*095059a4SDave May ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr); 392*095059a4SDave May ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr); 393*095059a4SDave May 394*095059a4SDave May ierr = MPI_Allreduce(&n_,&max_nnz,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 395*095059a4SDave May ierr = MatSeqAIJSetPreallocation(A,n_,NULL);CHKERRQ(ierr); 396*095059a4SDave May PetscPrintf(PETSC_COMM_WORLD,"max_nnz = %D \n", max_nnz ); 397*095059a4SDave May //printf("[%d]: nnz = %d \n", rank_i,n_ ); 398*095059a4SDave May { 399*095059a4SDave May ierr = MatMPIAIJSetPreallocation(A,1,NULL,n_,NULL);CHKERRQ(ierr); 400*095059a4SDave May } 401*095059a4SDave May 402*095059a4SDave May 403*095059a4SDave May /* Build original map */ 404*095059a4SDave May ierr = PetscMalloc( sizeof(PetscScalar)*n_, &vals );CHKERRQ(ierr); 405*095059a4SDave May for (i=0; i<n_; i++) { 406*095059a4SDave May vals[i] = 1.0; 407*095059a4SDave May } 408*095059a4SDave May ierr = MatSetValues( A, 1,&rank_i_, n_,proc_neighbours_, vals, INSERT_VALUES );CHKERRQ(ierr); 409*095059a4SDave May 410*095059a4SDave May ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 411*095059a4SDave May ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 412*095059a4SDave May 413*095059a4SDave May /* Now force all other connections if they are not already there */ 414*095059a4SDave May /* It's more efficient to do them all at once */ 415*095059a4SDave May for (i=0; i<n_; i++) { 416*095059a4SDave May vals[i] = 2.0; 417*095059a4SDave May } 418*095059a4SDave May ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_i_, vals, INSERT_VALUES );CHKERRQ(ierr); 419*095059a4SDave May 420*095059a4SDave May ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 421*095059a4SDave May ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 422*095059a4SDave May 423*095059a4SDave May ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr); 424*095059a4SDave May ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 425*095059a4SDave May ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 426*095059a4SDave May 427*095059a4SDave May /* 428*095059a4SDave May What the fuck is this? Is this necessary at all? 429*095059a4SDave May Why cannot I just use MatGetRow on the single row living on the current rank?? 430*095059a4SDave May Seems like a fine thing to do provided the operation MatGetRow() is supported for MATMPIAIJ 431*095059a4SDave May ~ DAM, Feb 26, 2013 (using petsc 3.2) 432*095059a4SDave May */ 433*095059a4SDave May /* Duplicate the entire matrix on ALL cpu's */ 434*095059a4SDave May /* 435*095059a4SDave May MatGetRedundantMatrix is not supported for SEQAIJ, thus we 436*095059a4SDave May fake a redundant matrix by setting equal to A. This enables the 437*095059a4SDave May code to run on one cpu (even if this seems slightly odd). 438*095059a4SDave May */ 439*095059a4SDave May is_seqaij = PETSC_FALSE; 440*095059a4SDave May ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&is_seqaij);CHKERRQ(ierr); 441*095059a4SDave May // if (is_seqaij==PETSC_FALSE) { 442*095059a4SDave May // ierr = MatGetRedundantMatrix( A, size_, PETSC_COMM_SELF, size_, MAT_INITIAL_MATRIX, &redA );CHKERRQ(ierr); 443*095059a4SDave May // } else { 444*095059a4SDave May redA = A; 445*095059a4SDave May ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 446*095059a4SDave May // } 447*095059a4SDave May 448*095059a4SDave May if ((n_new != NULL) && (proc_neighbours_new != NULL)) { 449*095059a4SDave May 450*095059a4SDave May ierr = MatGetRow( redA, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr); 451*095059a4SDave May 452*095059a4SDave May _n_new = (PetscMPIInt)nc; 453*095059a4SDave May _proc_neighbours_new = (PetscMPIInt*)malloc( sizeof(PetscMPIInt) * _n_new ); 454*095059a4SDave May 455*095059a4SDave May for (j=0; j<nc; j++) { 456*095059a4SDave May _proc_neighbours_new[j] = (PetscMPIInt)cols[j]; 457*095059a4SDave May } 458*095059a4SDave May ierr = MatRestoreRow( redA, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr); 459*095059a4SDave May 460*095059a4SDave May *n_new = (PetscMPIInt)_n_new; 461*095059a4SDave May *proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new; 462*095059a4SDave May } 463*095059a4SDave May 464*095059a4SDave May ierr = MatDestroy(&redA);CHKERRQ(ierr); 465*095059a4SDave May ierr = MatDestroy(&A);CHKERRQ(ierr); 466*095059a4SDave May ierr = PetscFree(vals);CHKERRQ(ierr); 467*095059a4SDave May ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr); 468*095059a4SDave May 469*095059a4SDave May ierr = MPI_Barrier(comm);CHKERRQ(ierr); 470*095059a4SDave May PetscTime(&t1); 471*095059a4SDave May PetscPrintf(PETSC_COMM_WORLD,"************************** Ending _DataExCompleteCommunicationMap [setup time: %1.4e (sec)] ************************** \n",t1-t0); 472*095059a4SDave May 473*095059a4SDave May PetscFunctionReturn(0); 474*095059a4SDave May } 475*095059a4SDave May 476*095059a4SDave May #undef __FUNCT__ 477*095059a4SDave May #define __FUNCT__ "DataExTopologyFinalize" 478*095059a4SDave May PetscErrorCode DataExTopologyFinalize(DataEx d) 479*095059a4SDave May { 480*095059a4SDave May PetscMPIInt symm_nn; 481*095059a4SDave May PetscMPIInt *symm_procs; 482*095059a4SDave May PetscMPIInt r0,n,st,rt; 483*095059a4SDave May PetscMPIInt nprocs; 484*095059a4SDave May PetscErrorCode ierr; 485*095059a4SDave May 486*095059a4SDave May 487*095059a4SDave May PetscFunctionBegin; 488*095059a4SDave May if (d->topology_status != DEOBJECT_INITIALIZED) { 489*095059a4SDave May SETERRQ( d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first" ); 490*095059a4SDave May } 491*095059a4SDave May ierr = PetscLogEventBegin(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr); 492*095059a4SDave May 493*095059a4SDave May /* given infomation about all my neighbours, make map symmetric */ 494*095059a4SDave May ierr = _DataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs );CHKERRQ(ierr); 495*095059a4SDave May /* update my arrays */ 496*095059a4SDave May free(d->neighbour_procs); 497*095059a4SDave May 498*095059a4SDave May d->n_neighbour_procs = symm_nn; 499*095059a4SDave May d->neighbour_procs = symm_procs; 500*095059a4SDave May 501*095059a4SDave May 502*095059a4SDave May /* allocates memory */ 503*095059a4SDave May if (d->messages_to_be_sent == NULL) { 504*095059a4SDave May d->messages_to_be_sent = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs ); 505*095059a4SDave May } 506*095059a4SDave May if (d->message_offsets == NULL) { 507*095059a4SDave May d->message_offsets = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs ); 508*095059a4SDave May } 509*095059a4SDave May if (d->messages_to_be_recvieved == NULL) { 510*095059a4SDave May d->messages_to_be_recvieved = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs ); 511*095059a4SDave May } 512*095059a4SDave May 513*095059a4SDave May if (d->pack_cnt == NULL) { 514*095059a4SDave May d->pack_cnt = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs ); 515*095059a4SDave May } 516*095059a4SDave May 517*095059a4SDave May if (d->_stats == NULL) { 518*095059a4SDave May d->_stats = (MPI_Status*)malloc( sizeof(MPI_Status) * 2*d->n_neighbour_procs ); 519*095059a4SDave May } 520*095059a4SDave May if (d->_requests == NULL) { 521*095059a4SDave May d->_requests = (MPI_Request*)malloc( sizeof(MPI_Request) * 2*d->n_neighbour_procs ); 522*095059a4SDave May } 523*095059a4SDave May 524*095059a4SDave May if (d->send_tags == NULL) { 525*095059a4SDave May d->send_tags = (int*)malloc( sizeof(int) * d->n_neighbour_procs ); 526*095059a4SDave May } 527*095059a4SDave May if (d->recv_tags == NULL) { 528*095059a4SDave May d->recv_tags = (int*)malloc( sizeof(int) * d->n_neighbour_procs ); 529*095059a4SDave May } 530*095059a4SDave May 531*095059a4SDave May /* compute message tags */ 532*095059a4SDave May ierr = MPI_Comm_size(d->comm,&nprocs);CHKERRQ(ierr); 533*095059a4SDave May r0 = d->rank; 534*095059a4SDave May for (n=0; n<d->n_neighbour_procs; n++) { 535*095059a4SDave May PetscMPIInt r1 = d->neighbour_procs[n]; 536*095059a4SDave May 537*095059a4SDave May _get_tags( d->instance, nprocs, r0,r1, &st, &rt ); 538*095059a4SDave May 539*095059a4SDave May d->send_tags[n] = (int)st; 540*095059a4SDave May d->recv_tags[n] = (int)rt; 541*095059a4SDave May } 542*095059a4SDave May 543*095059a4SDave May d->topology_status = DEOBJECT_FINALIZED; 544*095059a4SDave May ierr = PetscLogEventEnd(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr); 545*095059a4SDave May 546*095059a4SDave May PetscFunctionReturn(0); 547*095059a4SDave May } 548*095059a4SDave May 549*095059a4SDave May /* === Phase B === */ 550*095059a4SDave May #undef __FUNCT__ 551*095059a4SDave May #define __FUNCT__ "_DataExConvertProcIdToLocalIndex" 552*095059a4SDave May PetscErrorCode _DataExConvertProcIdToLocalIndex(DataEx de,PetscMPIInt proc_id,PetscMPIInt *local) 553*095059a4SDave May { 554*095059a4SDave May PetscMPIInt i,np; 555*095059a4SDave May 556*095059a4SDave May 557*095059a4SDave May PetscFunctionBegin; 558*095059a4SDave May np = de->n_neighbour_procs; 559*095059a4SDave May 560*095059a4SDave May *local = -1; 561*095059a4SDave May for (i=0; i<np; i++) { 562*095059a4SDave May if (proc_id == de->neighbour_procs[i]) { 563*095059a4SDave May *local = i; 564*095059a4SDave May break; 565*095059a4SDave May } 566*095059a4SDave May } 567*095059a4SDave May PetscFunctionReturn(0); 568*095059a4SDave May } 569*095059a4SDave May 570*095059a4SDave May #undef __FUNCT__ 571*095059a4SDave May #define __FUNCT__ "DataExInitializeSendCount" 572*095059a4SDave May PetscErrorCode DataExInitializeSendCount(DataEx de) 573*095059a4SDave May { 574*095059a4SDave May PetscMPIInt i; 575*095059a4SDave May 576*095059a4SDave May 577*095059a4SDave May PetscFunctionBegin; 578*095059a4SDave May if (de->topology_status != DEOBJECT_FINALIZED) { 579*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" ); 580*095059a4SDave May } 581*095059a4SDave May 582*095059a4SDave May de->message_lengths_status = DEOBJECT_INITIALIZED; 583*095059a4SDave May 584*095059a4SDave May for (i=0; i<de->n_neighbour_procs; i++) { 585*095059a4SDave May de->messages_to_be_sent[i] = 0; 586*095059a4SDave May } 587*095059a4SDave May 588*095059a4SDave May PetscFunctionReturn(0); 589*095059a4SDave May } 590*095059a4SDave May 591*095059a4SDave May /* 592*095059a4SDave May 1) only allows counters to be set on neighbouring cpus 593*095059a4SDave May */ 594*095059a4SDave May #undef __FUNCT__ 595*095059a4SDave May #define __FUNCT__ "DataExAddToSendCount" 596*095059a4SDave May PetscErrorCode DataExAddToSendCount(DataEx de,const PetscMPIInt proc_id,const PetscInt count) 597*095059a4SDave May { 598*095059a4SDave May PetscMPIInt local_val; 599*095059a4SDave May PetscErrorCode ierr; 600*095059a4SDave May 601*095059a4SDave May 602*095059a4SDave May PetscFunctionBegin; 603*095059a4SDave May if (de->message_lengths_status == DEOBJECT_FINALIZED) { 604*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DataExInitializeSendCount() first" ); 605*095059a4SDave May } 606*095059a4SDave May else if (de->message_lengths_status != DEOBJECT_INITIALIZED) { 607*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" ); 608*095059a4SDave May } 609*095059a4SDave May 610*095059a4SDave May ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local_val );CHKERRQ(ierr); 611*095059a4SDave May if (local_val == -1) { 612*095059a4SDave May SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id ); 613*095059a4SDave May } 614*095059a4SDave May 615*095059a4SDave May de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count; 616*095059a4SDave May PetscFunctionReturn(0); 617*095059a4SDave May } 618*095059a4SDave May 619*095059a4SDave May #undef __FUNCT__ 620*095059a4SDave May #define __FUNCT__ "DataExFinalizeSendCount" 621*095059a4SDave May PetscErrorCode DataExFinalizeSendCount(DataEx de) 622*095059a4SDave May { 623*095059a4SDave May PetscFunctionBegin; 624*095059a4SDave May if (de->message_lengths_status != DEOBJECT_INITIALIZED) { 625*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" ); 626*095059a4SDave May } 627*095059a4SDave May de->message_lengths_status = DEOBJECT_FINALIZED; 628*095059a4SDave May 629*095059a4SDave May PetscFunctionReturn(0); 630*095059a4SDave May } 631*095059a4SDave May 632*095059a4SDave May /* === Phase C === */ 633*095059a4SDave May /* 634*095059a4SDave May * zero out all send counts 635*095059a4SDave May * free send and recv buffers 636*095059a4SDave May * zeros out message length 637*095059a4SDave May * zeros out all counters 638*095059a4SDave May * zero out packed data counters 639*095059a4SDave May */ 640*095059a4SDave May #undef __FUNCT__ 641*095059a4SDave May #define __FUNCT__ "_DataExInitializeTmpStorage" 642*095059a4SDave May PetscErrorCode _DataExInitializeTmpStorage(DataEx de) 643*095059a4SDave May { 644*095059a4SDave May PetscMPIInt i,np; 645*095059a4SDave May 646*095059a4SDave May 647*095059a4SDave May PetscFunctionBegin; 648*095059a4SDave May if (de->n_neighbour_procs < 0) { 649*095059a4SDave May SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of neighbour procs < 0"); 650*095059a4SDave May } 651*095059a4SDave May if (de->neighbour_procs == NULL) { 652*095059a4SDave May SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "Neighbour proc list is NULL" ); 653*095059a4SDave May } 654*095059a4SDave May 655*095059a4SDave May np = de->n_neighbour_procs; 656*095059a4SDave May for (i=0; i<np; i++) { 657*095059a4SDave May /* de->messages_to_be_sent[i] = -1; */ 658*095059a4SDave May de->messages_to_be_recvieved[i] = -1; 659*095059a4SDave May } 660*095059a4SDave May 661*095059a4SDave May if (de->send_message != NULL) { 662*095059a4SDave May free(de->send_message); 663*095059a4SDave May de->send_message = NULL; 664*095059a4SDave May } 665*095059a4SDave May if (de->recv_message != NULL) { 666*095059a4SDave May free(de->recv_message); 667*095059a4SDave May de->recv_message = NULL; 668*095059a4SDave May } 669*095059a4SDave May 670*095059a4SDave May PetscFunctionReturn(0); 671*095059a4SDave May } 672*095059a4SDave May 673*095059a4SDave May /* 674*095059a4SDave May *) Zeros out pack data counters 675*095059a4SDave May *) Ensures mesaage length is set 676*095059a4SDave May *) Checks send counts properly initialized 677*095059a4SDave May *) allocates space for pack data 678*095059a4SDave May */ 679*095059a4SDave May #undef __FUNCT__ 680*095059a4SDave May #define __FUNCT__ "DataExPackInitialize" 681*095059a4SDave May PetscErrorCode DataExPackInitialize(DataEx de,size_t unit_message_size) 682*095059a4SDave May { 683*095059a4SDave May PetscMPIInt i,np; 684*095059a4SDave May PetscInt total; 685*095059a4SDave May PetscErrorCode ierr; 686*095059a4SDave May 687*095059a4SDave May 688*095059a4SDave May PetscFunctionBegin; 689*095059a4SDave May if (de->topology_status != DEOBJECT_FINALIZED) { 690*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" ); 691*095059a4SDave May } 692*095059a4SDave May if (de->message_lengths_status != DEOBJECT_FINALIZED) { 693*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" ); 694*095059a4SDave May } 695*095059a4SDave May 696*095059a4SDave May de->packer_status = DEOBJECT_INITIALIZED; 697*095059a4SDave May 698*095059a4SDave May ierr = _DataExInitializeTmpStorage(de);CHKERRQ(ierr); 699*095059a4SDave May 700*095059a4SDave May np = de->n_neighbour_procs; 701*095059a4SDave May 702*095059a4SDave May de->unit_message_size = unit_message_size; 703*095059a4SDave May 704*095059a4SDave May total = 0; 705*095059a4SDave May for (i=0; i<np; i++) { 706*095059a4SDave May if (de->messages_to_be_sent[i] == -1) { 707*095059a4SDave May PetscMPIInt proc_neighour = de->neighbour_procs[i]; 708*095059a4SDave May SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DataExSetSendCount() first", (int)proc_neighour ); 709*095059a4SDave May } 710*095059a4SDave May total = total + de->messages_to_be_sent[i]; 711*095059a4SDave May } 712*095059a4SDave May 713*095059a4SDave May /* create space for the data to be sent */ 714*095059a4SDave May de->send_message = (void*)malloc( unit_message_size * (total + 1) ); 715*095059a4SDave May /* initialize memory */ 716*095059a4SDave May memset( de->send_message, 0, unit_message_size * (total + 1) ); 717*095059a4SDave May /* set total items to send */ 718*095059a4SDave May de->send_message_length = total; 719*095059a4SDave May 720*095059a4SDave May de->message_offsets[0] = 0; 721*095059a4SDave May total = de->messages_to_be_sent[0]; 722*095059a4SDave May for (i=1; i<np; i++) { 723*095059a4SDave May de->message_offsets[i] = total; 724*095059a4SDave May total = total + de->messages_to_be_sent[i]; 725*095059a4SDave May } 726*095059a4SDave May 727*095059a4SDave May /* init the packer counters */ 728*095059a4SDave May de->total_pack_cnt = 0; 729*095059a4SDave May for (i=0; i<np; i++) { 730*095059a4SDave May de->pack_cnt[i] = 0; 731*095059a4SDave May } 732*095059a4SDave May 733*095059a4SDave May PetscFunctionReturn(0); 734*095059a4SDave May } 735*095059a4SDave May 736*095059a4SDave May /* 737*095059a4SDave May *) Ensures data gets been packed appropriately and no overlaps occur 738*095059a4SDave May */ 739*095059a4SDave May #undef __FUNCT__ 740*095059a4SDave May #define __FUNCT__ "DataExPackData" 741*095059a4SDave May PetscErrorCode DataExPackData(DataEx de,PetscMPIInt proc_id,PetscInt n,void *data) 742*095059a4SDave May { 743*095059a4SDave May PetscMPIInt local; 744*095059a4SDave May PetscInt insert_location; 745*095059a4SDave May void *dest; 746*095059a4SDave May PetscErrorCode ierr; 747*095059a4SDave May 748*095059a4SDave May 749*095059a4SDave May PetscFunctionBegin; 750*095059a4SDave May if (de->packer_status == DEOBJECT_FINALIZED) { 751*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" ); 752*095059a4SDave May } 753*095059a4SDave May else if (de->packer_status != DEOBJECT_INITIALIZED) { 754*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" ); 755*095059a4SDave May } 756*095059a4SDave May 757*095059a4SDave May 758*095059a4SDave May if (de->send_message == NULL){ 759*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DataExPackInitialize() first" ); 760*095059a4SDave May } 761*095059a4SDave May 762*095059a4SDave May 763*095059a4SDave May ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local );CHKERRQ(ierr); 764*095059a4SDave May if (local == -1) { 765*095059a4SDave May SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id ); 766*095059a4SDave May } 767*095059a4SDave May 768*095059a4SDave May if (n+de->pack_cnt[local] > de->messages_to_be_sent[local]) { 769*095059a4SDave May SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %D: Attempt to insert %D", 770*095059a4SDave May (int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local] ); 771*095059a4SDave May 772*095059a4SDave May /* don't need this - the catch for too many messages will pick this up. Gives us more info though */ 773*095059a4SDave May if (de->packer_status == DEOBJECT_FINALIZED) { 774*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ARG_WRONG, "Cannot insert any more data. DataExPackFinalize() has been called." ); 775*095059a4SDave May } 776*095059a4SDave May } 777*095059a4SDave May 778*095059a4SDave May /* copy memory */ 779*095059a4SDave May insert_location = de->message_offsets[local] + de->pack_cnt[local]; 780*095059a4SDave May dest = ((char*)de->send_message) + de->unit_message_size*insert_location; 781*095059a4SDave May memcpy( dest, data, de->unit_message_size * n ); 782*095059a4SDave May 783*095059a4SDave May /* increment counter */ 784*095059a4SDave May de->pack_cnt[local] = de->pack_cnt[local] + n; 785*095059a4SDave May 786*095059a4SDave May PetscFunctionReturn(0); 787*095059a4SDave May } 788*095059a4SDave May 789*095059a4SDave May /* 790*095059a4SDave May *) Ensures all data has been packed 791*095059a4SDave May */ 792*095059a4SDave May #undef __FUNCT__ 793*095059a4SDave May #define __FUNCT__ "DataExPackFinalize" 794*095059a4SDave May PetscErrorCode DataExPackFinalize(DataEx de) 795*095059a4SDave May { 796*095059a4SDave May PetscMPIInt i,np; 797*095059a4SDave May PetscInt total; 798*095059a4SDave May PetscErrorCode ierr; 799*095059a4SDave May 800*095059a4SDave May 801*095059a4SDave May PetscFunctionBegin; 802*095059a4SDave May if (de->packer_status != DEOBJECT_INITIALIZED) { 803*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DataExPackInitialize() first." ); 804*095059a4SDave May } 805*095059a4SDave May 806*095059a4SDave May np = de->n_neighbour_procs; 807*095059a4SDave May 808*095059a4SDave May for (i=0; i<np; i++) { 809*095059a4SDave May if (de->pack_cnt[i] != de->messages_to_be_sent[i]) { 810*095059a4SDave May SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %D : Inserted %D", 811*095059a4SDave May (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i] ); 812*095059a4SDave May } 813*095059a4SDave May } 814*095059a4SDave May 815*095059a4SDave May /* init */ 816*095059a4SDave May for (i=0; i<np; i++) { 817*095059a4SDave May de->messages_to_be_recvieved[i] = -1; 818*095059a4SDave May } 819*095059a4SDave May 820*095059a4SDave May /* figure out the recv counts here */ 821*095059a4SDave May for (i=0; i<np; i++) { 822*095059a4SDave May // MPI_Send( &de->messages_to_be_sent[i], 1, MPI_INT, de->neighbour_procs[i], de->send_tags[i], de->comm ); 823*095059a4SDave May ierr = MPI_Isend( &de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr); 824*095059a4SDave May // MPI_Send( &de->messages_to_be_sent[i], 1, MPI_INT, de->neighbour_procs[i], 0, de->comm ); 825*095059a4SDave May } 826*095059a4SDave May for (i=0; i<np; i++) { 827*095059a4SDave May // MPI_Recv( &de->messages_to_be_recvieved[i], 1, MPI_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &stat ); 828*095059a4SDave May ierr = MPI_Irecv( &de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr); 829*095059a4SDave May // MPI_Recv( &de->messages_to_be_recvieved[i], 1, MPI_INT, de->neighbour_procs[i], 0, de->comm, &stat ); 830*095059a4SDave May } 831*095059a4SDave May ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr); 832*095059a4SDave May 833*095059a4SDave May /* create space for the data to be recvieved */ 834*095059a4SDave May total = 0; 835*095059a4SDave May for (i=0; i<np; i++) { 836*095059a4SDave May total = total + de->messages_to_be_recvieved[i]; 837*095059a4SDave May } 838*095059a4SDave May de->recv_message = (void*)malloc( de->unit_message_size * (total + 1) ); 839*095059a4SDave May /* initialize memory */ 840*095059a4SDave May memset( de->recv_message, 0, de->unit_message_size * (total + 1) ); 841*095059a4SDave May /* set total items to recieve */ 842*095059a4SDave May de->recv_message_length = total; 843*095059a4SDave May 844*095059a4SDave May de->packer_status = DEOBJECT_FINALIZED; 845*095059a4SDave May 846*095059a4SDave May de->communication_status = DEOBJECT_INITIALIZED; 847*095059a4SDave May 848*095059a4SDave May PetscFunctionReturn(0); 849*095059a4SDave May } 850*095059a4SDave May 851*095059a4SDave May /* do the actual message passing now */ 852*095059a4SDave May #undef __FUNCT__ 853*095059a4SDave May #define __FUNCT__ "DataExBegin" 854*095059a4SDave May PetscErrorCode DataExBegin(DataEx de) 855*095059a4SDave May { 856*095059a4SDave May PetscMPIInt i,np; 857*095059a4SDave May void *dest; 858*095059a4SDave May PetscInt length; 859*095059a4SDave May PetscErrorCode ierr; 860*095059a4SDave May 861*095059a4SDave May 862*095059a4SDave May PetscFunctionBegin; 863*095059a4SDave May if (de->topology_status != DEOBJECT_FINALIZED) { 864*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" ); 865*095059a4SDave May } 866*095059a4SDave May if (de->message_lengths_status != DEOBJECT_FINALIZED) { 867*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" ); 868*095059a4SDave May } 869*095059a4SDave May if (de->packer_status != DEOBJECT_FINALIZED) { 870*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer not finalized" ); 871*095059a4SDave May } 872*095059a4SDave May 873*095059a4SDave May if (de->communication_status == DEOBJECT_FINALIZED) { 874*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DataExInitialize() first." ); 875*095059a4SDave May } 876*095059a4SDave May 877*095059a4SDave May if (de->recv_message == NULL) { 878*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" ); 879*095059a4SDave May } 880*095059a4SDave May 881*095059a4SDave May ierr = PetscLogEventBegin(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr); 882*095059a4SDave May np = de->n_neighbour_procs; 883*095059a4SDave May 884*095059a4SDave May /* == NON BLOCKING == */ 885*095059a4SDave May for (i=0; i<np; i++) { 886*095059a4SDave May length = de->messages_to_be_sent[i] * de->unit_message_size; 887*095059a4SDave May dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i]; 888*095059a4SDave May ierr = MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr); 889*095059a4SDave May } 890*095059a4SDave May 891*095059a4SDave May ierr = PetscLogEventEnd(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr); 892*095059a4SDave May PetscFunctionReturn(0); 893*095059a4SDave May } 894*095059a4SDave May 895*095059a4SDave May /* do the actual message passing now */ 896*095059a4SDave May #undef __FUNCT__ 897*095059a4SDave May #define __FUNCT__ "DataExEnd" 898*095059a4SDave May PetscErrorCode DataExEnd(DataEx de) 899*095059a4SDave May { 900*095059a4SDave May PetscMPIInt i,np; 901*095059a4SDave May PetscInt total; 902*095059a4SDave May PetscInt *message_recv_offsets; 903*095059a4SDave May void *dest; 904*095059a4SDave May PetscInt length; 905*095059a4SDave May PetscErrorCode ierr; 906*095059a4SDave May 907*095059a4SDave May 908*095059a4SDave May PetscFunctionBegin; 909*095059a4SDave May if (de->communication_status != DEOBJECT_INITIALIZED) { 910*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DataExInitialize() first." ); 911*095059a4SDave May } 912*095059a4SDave May if (de->recv_message == NULL) { 913*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" ); 914*095059a4SDave May } 915*095059a4SDave May 916*095059a4SDave May ierr = PetscLogEventBegin(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr); 917*095059a4SDave May np = de->n_neighbour_procs; 918*095059a4SDave May 919*095059a4SDave May message_recv_offsets = (PetscInt*)malloc( sizeof(PetscInt) * np ); 920*095059a4SDave May message_recv_offsets[0] = 0; 921*095059a4SDave May total = de->messages_to_be_recvieved[0]; 922*095059a4SDave May for (i=1; i<np; i++) { 923*095059a4SDave May message_recv_offsets[i] = total; 924*095059a4SDave May total = total + de->messages_to_be_recvieved[i]; 925*095059a4SDave May } 926*095059a4SDave May 927*095059a4SDave May /* == NON BLOCKING == */ 928*095059a4SDave May for (i=0; i<np; i++) { 929*095059a4SDave May length = de->messages_to_be_recvieved[i] * de->unit_message_size; 930*095059a4SDave May dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i]; 931*095059a4SDave May ierr = MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr); 932*095059a4SDave May } 933*095059a4SDave May ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr); 934*095059a4SDave May 935*095059a4SDave May free(message_recv_offsets); 936*095059a4SDave May 937*095059a4SDave May de->communication_status = DEOBJECT_FINALIZED; 938*095059a4SDave May ierr = PetscLogEventEnd(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr); 939*095059a4SDave May PetscFunctionReturn(0); 940*095059a4SDave May } 941*095059a4SDave May 942*095059a4SDave May #undef __FUNCT__ 943*095059a4SDave May #define __FUNCT__ "DataExGetSendData" 944*095059a4SDave May PetscErrorCode DataExGetSendData(DataEx de,PetscInt *length,void **send) 945*095059a4SDave May { 946*095059a4SDave May PetscFunctionBegin; 947*095059a4SDave May if (de->packer_status != DEOBJECT_FINALIZED) { 948*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed." ); 949*095059a4SDave May } 950*095059a4SDave May *length = de->send_message_length; 951*095059a4SDave May *send = de->send_message; 952*095059a4SDave May PetscFunctionReturn(0); 953*095059a4SDave May } 954*095059a4SDave May 955*095059a4SDave May #undef __FUNCT__ 956*095059a4SDave May #define __FUNCT__ "DataExGetRecvData" 957*095059a4SDave May PetscErrorCode DataExGetRecvData(DataEx de,PetscInt *length,void **recv) 958*095059a4SDave May { 959*095059a4SDave May PetscFunctionBegin; 960*095059a4SDave May if (de->communication_status != DEOBJECT_FINALIZED) { 961*095059a4SDave May SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent." ); 962*095059a4SDave May } 963*095059a4SDave May *length = de->recv_message_length; 964*095059a4SDave May *recv = de->recv_message; 965*095059a4SDave May PetscFunctionReturn(0); 966*095059a4SDave May } 967*095059a4SDave May 968*095059a4SDave May #undef __FUNCT__ 969*095059a4SDave May #define __FUNCT__ "DataExTopologyGetNeighbours" 970*095059a4SDave May PetscErrorCode DataExTopologyGetNeighbours(DataEx de,PetscMPIInt *n,PetscMPIInt *neigh[]) 971*095059a4SDave May { 972*095059a4SDave May PetscFunctionBegin; 973*095059a4SDave May if (n) { *n = de->n_neighbour_procs; } 974*095059a4SDave May if (neigh) { *neigh = de->neighbour_procs; } 975*095059a4SDave May PetscFunctionReturn(0); 976*095059a4SDave May } 977*095059a4SDave May 978