1 /* 2 Build a few basic tools to help with partitioned domains. 3 4 1) 5 On each processor, have a DomainExchangerTopology. 6 This is a doubly-connected edge list which enumerates the 7 communication paths between connected processors. By numbering 8 these paths we can always uniquely assign message identifers. 9 10 edge 11 10 12 proc ---------> proc 13 0 <-------- 1 14 11 15 twin 16 17 Eg: Proc 0 send to proc 1 with message id is 10. To recieve the correct 18 message, proc 1 looks for the edge connected to proc 0, and then the 19 messgae id comes from the twin of that edge 20 21 2) 22 A DomainExchangerArrayPacker. 23 A little function which given a piece of data, will memcpy the data into 24 an array (which will be sent to procs) into the correct place. 25 26 On Proc 1 we sent data to procs 0,2,3. The data is on different lengths. 27 All data gets jammed into single array. Need to "jam" data into correct locations 28 The Packer knows how much is to going to each processor and keeps track of the inserts 29 so as to avoid ever packing TOO much into one slot, and inevatbly corrupting some memory 30 31 data to 0 data to 2 data to 3 32 33 |--------|-----------------|--| 34 35 36 User has to unpack message themselves. I can get you the pointer for each i 37 entry, but you'll have to cast it to the appropriate data type. 38 39 40 41 42 Phase A: Build topology 43 44 Phase B: Define message lengths 45 46 Phase C: Pack data 47 48 Phase D: Send data 49 50 // 51 DataExCreate() 52 // A 53 DataExTopologyInitialize() 54 DataExTopologyAddNeighbour() 55 DataExTopologyAddNeighbour() 56 DataExTopologyFinalize() 57 // B 58 DataExZeroAllSendCount() 59 DataExAddToSendCount() 60 DataExAddToSendCount() 61 DataExAddToSendCount() 62 // C 63 DataExPackInitialize() 64 DataExPackData() 65 DataExPackData() 66 DataExPackFinalize() 67 // D 68 DataExBegin() 69 // ... perform any calculations ... /// 70 DataExEnd() 71 72 // Call any getters // 73 74 75 */ 76 77 #include <petsc.h> 78 #include <petscvec.h> 79 #include <petscmat.h> 80 81 #include "data_ex.h" 82 83 const char *status_names[] = { "initialized", "finalized", "unknown" }; 84 85 PetscLogEvent PTATIN_DataExchangerTopologySetup; 86 PetscLogEvent PTATIN_DataExchangerBegin; 87 PetscLogEvent PTATIN_DataExchangerEnd; 88 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "DataExCreate" 92 DataEx DataExCreate(MPI_Comm comm,const PetscInt count) 93 { 94 DataEx d; 95 PetscErrorCode ierr; 96 97 d = (DataEx)malloc( sizeof(struct _p_DataEx) ); 98 memset( d, 0, sizeof(struct _p_DataEx) ); 99 100 ierr = MPI_Comm_dup(comm,&d->comm); 101 ierr = MPI_Comm_rank(d->comm,&d->rank); 102 103 d->instance = count; 104 105 d->topology_status = DEOBJECT_STATE_UNKNOWN; 106 d->message_lengths_status = DEOBJECT_STATE_UNKNOWN; 107 d->packer_status = DEOBJECT_STATE_UNKNOWN; 108 d->communication_status = DEOBJECT_STATE_UNKNOWN; 109 110 d->n_neighbour_procs = -1; 111 d->neighbour_procs = NULL; 112 113 d->messages_to_be_sent = NULL; 114 d->message_offsets = NULL; 115 d->messages_to_be_recvieved = NULL; 116 117 d->unit_message_size = -1; 118 d->send_message = NULL; 119 d->send_message_length = -1; 120 d->recv_message = NULL; 121 d->recv_message_length = -1; 122 d->total_pack_cnt = -1; 123 d->pack_cnt = NULL; 124 125 d->send_tags = NULL; 126 d->recv_tags = NULL; 127 128 d->_stats = NULL; 129 d->_requests = NULL; 130 131 return d; 132 } 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "DataExView" 136 PetscErrorCode DataExView(DataEx d) 137 { 138 PetscMPIInt p; 139 140 141 PetscFunctionBegin; 142 PetscPrintf( PETSC_COMM_WORLD, "DataEx: instance=%D\n",d->instance); 143 144 PetscPrintf( PETSC_COMM_WORLD, " topology status: %s \n", status_names[d->topology_status]); 145 PetscPrintf( PETSC_COMM_WORLD, " message lengths status: %s \n", status_names[d->message_lengths_status] ); 146 PetscPrintf( PETSC_COMM_WORLD, " packer status status: %s \n", status_names[d->packer_status] ); 147 PetscPrintf( PETSC_COMM_WORLD, " communication status: %s \n", status_names[d->communication_status] ); 148 149 if (d->topology_status == DEOBJECT_FINALIZED) { 150 PetscPrintf( PETSC_COMM_WORLD, " Topology:\n"); 151 PetscPrintf( PETSC_COMM_SELF, " [%d] neighbours: %d \n", (int)d->rank, (int)d->n_neighbour_procs ); 152 for (p=0; p<d->n_neighbour_procs; p++) { 153 PetscPrintf( PETSC_COMM_SELF, " [%d] neighbour[%D] = %d \n", (int)d->rank, p, (int)d->neighbour_procs[p]); 154 } 155 } 156 157 if (d->message_lengths_status == DEOBJECT_FINALIZED) { 158 PetscPrintf( PETSC_COMM_WORLD, " Message lengths:\n"); 159 PetscPrintf( PETSC_COMM_SELF, " [%d] atomic size: %ld \n", (int)d->rank, (long int)d->unit_message_size ); 160 for (p=0; p<d->n_neighbour_procs; p++) { 161 PetscPrintf( PETSC_COMM_SELF, " [%d] >>>>> ( %D units :: tag = %d ) >>>>> [%d] \n", (int)d->rank, d->messages_to_be_sent[p], d->send_tags[p], (int)d->neighbour_procs[p] ); 162 } 163 for (p=0; p<d->n_neighbour_procs; p++) { 164 PetscPrintf( PETSC_COMM_SELF, " [%d] <<<<< ( %D units :: tag = %d ) <<<<< [%d] \n", (int)d->rank, d->messages_to_be_recvieved[p], d->recv_tags[p], (int)d->neighbour_procs[p] ); 165 } 166 } 167 168 if (d->packer_status == DEOBJECT_FINALIZED) { 169 170 } 171 172 if (d->communication_status == DEOBJECT_FINALIZED) { 173 174 } 175 176 PetscFunctionReturn(0); 177 } 178 179 #undef __FUNCT__ 180 #define __FUNCT__ "DataExDestroy" 181 PetscErrorCode DataExDestroy(DataEx d) 182 { 183 PetscErrorCode ierr; 184 185 PetscFunctionBegin; 186 ierr = MPI_Comm_free(&d->comm);CHKERRQ(ierr); 187 188 if (d->neighbour_procs != NULL) { 189 free(d->neighbour_procs); 190 } 191 192 if (d->messages_to_be_sent != NULL) { 193 free(d->messages_to_be_sent); 194 } 195 196 if (d->message_offsets != NULL) { 197 free(d->message_offsets); 198 } 199 200 if (d->messages_to_be_recvieved != NULL) { 201 free(d->messages_to_be_recvieved); 202 } 203 204 if (d->send_message != NULL) { 205 free(d->send_message); 206 } 207 208 if (d->recv_message != NULL) { 209 free(d->recv_message); 210 } 211 212 if (d->pack_cnt != NULL) { 213 free(d->pack_cnt); 214 } 215 216 if (d->send_tags != NULL) { 217 free(d->send_tags); 218 } 219 if (d->recv_tags != NULL) { 220 free(d->recv_tags); 221 } 222 223 if (d->_stats != NULL) { 224 free(d->_stats); 225 } 226 if (d->_requests != NULL) { 227 free(d->_requests); 228 } 229 230 free(d); 231 232 PetscFunctionReturn(0); 233 } 234 235 /* === Phase A === */ 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "DataExTopologyInitialize" 239 PetscErrorCode DataExTopologyInitialize(DataEx d) 240 { 241 PetscFunctionBegin; 242 d->topology_status = DEOBJECT_INITIALIZED; 243 244 d->n_neighbour_procs = 0; 245 if (d->neighbour_procs != NULL) { free(d->neighbour_procs); d->neighbour_procs = NULL; } 246 if (d->messages_to_be_sent != NULL) { free(d->messages_to_be_sent); d->messages_to_be_sent = NULL; } 247 if (d->message_offsets != NULL) { free(d->message_offsets); d->message_offsets = NULL; } 248 if (d->messages_to_be_recvieved != NULL) { free(d->messages_to_be_recvieved); d->messages_to_be_recvieved = NULL; } 249 if (d->pack_cnt != NULL) { free(d->pack_cnt); d->pack_cnt = NULL; } 250 251 if (d->send_tags != NULL) { free(d->send_tags); d->send_tags = NULL; } 252 if (d->recv_tags != NULL) { free(d->recv_tags); d->recv_tags = NULL; } 253 254 PetscFunctionReturn(0); 255 } 256 257 #undef __FUNCT__ 258 #define __FUNCT__ "DataExTopologyAddNeighbour" 259 PetscErrorCode DataExTopologyAddNeighbour(DataEx d,const PetscMPIInt proc_id) 260 { 261 PetscMPIInt n,found; 262 PetscMPIInt nproc; 263 PetscErrorCode ierr; 264 265 266 PetscFunctionBegin; 267 if (d->topology_status == DEOBJECT_FINALIZED) { 268 SETERRQ( d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology has been finalized. To modify or update call DataExTopologyInitialize() first" ); 269 } 270 else if (d->topology_status != DEOBJECT_INITIALIZED) { 271 SETERRQ( d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first" ); 272 } 273 274 /* error on negative entries */ 275 if (proc_id < 0) { 276 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank < 0"); 277 } 278 /* error on ranks larger than number of procs in communicator */ 279 ierr = MPI_Comm_size(d->comm,&nproc);CHKERRQ(ierr); 280 if (proc_id >= nproc) { 281 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to set proc neighbour with a rank >= nproc"); 282 } 283 284 if (d->n_neighbour_procs == 0) { 285 d->neighbour_procs = (PetscMPIInt*)malloc( sizeof(PetscMPIInt) ); 286 } 287 288 /* check for proc_id */ 289 found = 0; 290 for (n=0; n<d->n_neighbour_procs; n++) { 291 if (d->neighbour_procs[n] == proc_id) { 292 found = 1; 293 } 294 } 295 if (found == 0) { /* add it to list */ 296 PetscMPIInt *tmp; 297 298 tmp = (PetscMPIInt*)realloc( d->neighbour_procs, sizeof(PetscMPIInt)*(d->n_neighbour_procs+1) ); 299 d->neighbour_procs = tmp; 300 301 d->neighbour_procs[ d->n_neighbour_procs ] = proc_id; 302 d->n_neighbour_procs++; 303 } 304 305 PetscFunctionReturn(0); 306 } 307 308 /* 309 counter: the index of the communication object 310 N: the number of processors 311 r0: rank of sender 312 r1: rank of receiver 313 314 procs = { 0, 1, 2, 3 } 315 316 0 ==> 0 e=0 317 0 ==> 1 e=1 318 0 ==> 2 e=2 319 0 ==> 3 e=3 320 321 1 ==> 0 e=4 322 1 ==> 1 e=5 323 1 ==> 2 e=6 324 1 ==> 3 e=7 325 326 2 ==> 0 e=8 327 2 ==> 1 e=9 328 2 ==> 2 e=10 329 2 ==> 3 e=11 330 331 3 ==> 0 e=12 332 3 ==> 1 e=13 333 3 ==> 2 e=14 334 3 ==> 3 e=15 335 336 If we require that proc A sends to proc B, then the SEND tag index will be given by 337 N * rank(A) + rank(B) + offset 338 If we require that proc A will receive from proc B, then the RECV tag index will be given by 339 N * rank(B) + rank(A) + offset 340 341 */ 342 void _get_tags( PetscInt counter, PetscMPIInt N, PetscMPIInt r0,PetscMPIInt r1, PetscMPIInt *_st, PetscMPIInt *_rt ) 343 { 344 PetscMPIInt st,rt; 345 346 347 st = N*r0 + r1 + N*N*counter; 348 rt = N*r1 + r0 + N*N*counter; 349 350 *_st = st; 351 *_rt = rt; 352 } 353 354 /* 355 Makes the communication map symmetric 356 */ 357 #undef __FUNCT__ 358 #define __FUNCT__ "_DataExCompleteCommunicationMap" 359 PetscErrorCode _DataExCompleteCommunicationMap(MPI_Comm comm,PetscMPIInt n,PetscMPIInt proc_neighbours[],PetscMPIInt *n_new,PetscMPIInt **proc_neighbours_new) 360 { 361 Mat A,redA; 362 PetscInt i,j,nc; 363 PetscInt n_, *proc_neighbours_; 364 PetscInt rank_i_; 365 PetscMPIInt size, rank_i; 366 PetscInt max_nnz; 367 PetscScalar *vals; 368 const PetscInt *cols; 369 const PetscScalar *red_vals; 370 PetscMPIInt _n_new, *_proc_neighbours_new; 371 PetscBool is_seqaij; 372 PetscLogDouble t0,t1; 373 PetscErrorCode ierr; 374 375 376 PetscFunctionBegin; 377 PetscPrintf(PETSC_COMM_WORLD,"************************** Starting _DataExCompleteCommunicationMap ************************** \n"); 378 PetscTime(&t0); 379 380 n_ = n; 381 ierr = PetscMalloc( sizeof(PetscInt) * n_, &proc_neighbours_ );CHKERRQ(ierr); 382 for (i=0; i<n_; i++) { 383 proc_neighbours_[i] = proc_neighbours[i]; 384 } 385 386 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 387 ierr = MPI_Comm_rank(comm,&rank_i);CHKERRQ(ierr); 388 rank_i_ = rank_i; 389 390 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 391 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,size,size);CHKERRQ(ierr); 392 ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr); 393 394 ierr = MPI_Allreduce(&n_,&max_nnz,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 395 ierr = MatSeqAIJSetPreallocation(A,n_,NULL);CHKERRQ(ierr); 396 PetscPrintf(PETSC_COMM_WORLD,"max_nnz = %D \n", max_nnz ); 397 //printf("[%d]: nnz = %d \n", rank_i,n_ ); 398 { 399 ierr = MatMPIAIJSetPreallocation(A,1,NULL,n_,NULL);CHKERRQ(ierr); 400 } 401 402 403 /* Build original map */ 404 ierr = PetscMalloc( sizeof(PetscScalar)*n_, &vals );CHKERRQ(ierr); 405 for (i=0; i<n_; i++) { 406 vals[i] = 1.0; 407 } 408 ierr = MatSetValues( A, 1,&rank_i_, n_,proc_neighbours_, vals, INSERT_VALUES );CHKERRQ(ierr); 409 410 ierr = MatAssemblyBegin(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 411 ierr = MatAssemblyEnd(A,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 412 413 /* Now force all other connections if they are not already there */ 414 /* It's more efficient to do them all at once */ 415 for (i=0; i<n_; i++) { 416 vals[i] = 2.0; 417 } 418 ierr = MatSetValues( A, n_,proc_neighbours_, 1,&rank_i_, vals, INSERT_VALUES );CHKERRQ(ierr); 419 420 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 421 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 422 423 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr); 424 ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 425 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 426 427 /* 428 What the fuck is this? Is this necessary at all? 429 Why cannot I just use MatGetRow on the single row living on the current rank?? 430 Seems like a fine thing to do provided the operation MatGetRow() is supported for MATMPIAIJ 431 ~ DAM, Feb 26, 2013 (using petsc 3.2) 432 */ 433 /* Duplicate the entire matrix on ALL cpu's */ 434 /* 435 MatGetRedundantMatrix is not supported for SEQAIJ, thus we 436 fake a redundant matrix by setting equal to A. This enables the 437 code to run on one cpu (even if this seems slightly odd). 438 */ 439 is_seqaij = PETSC_FALSE; 440 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&is_seqaij);CHKERRQ(ierr); 441 // if (is_seqaij==PETSC_FALSE) { 442 // ierr = MatGetRedundantMatrix( A, size_, PETSC_COMM_SELF, size_, MAT_INITIAL_MATRIX, &redA );CHKERRQ(ierr); 443 // } else { 444 redA = A; 445 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 446 // } 447 448 if ((n_new != NULL) && (proc_neighbours_new != NULL)) { 449 450 ierr = MatGetRow( redA, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr); 451 452 _n_new = (PetscMPIInt)nc; 453 _proc_neighbours_new = (PetscMPIInt*)malloc( sizeof(PetscMPIInt) * _n_new ); 454 455 for (j=0; j<nc; j++) { 456 _proc_neighbours_new[j] = (PetscMPIInt)cols[j]; 457 } 458 ierr = MatRestoreRow( redA, rank_i_, &nc, &cols, &red_vals );CHKERRQ(ierr); 459 460 *n_new = (PetscMPIInt)_n_new; 461 *proc_neighbours_new = (PetscMPIInt*)_proc_neighbours_new; 462 } 463 464 ierr = MatDestroy(&redA);CHKERRQ(ierr); 465 ierr = MatDestroy(&A);CHKERRQ(ierr); 466 ierr = PetscFree(vals);CHKERRQ(ierr); 467 ierr = PetscFree(proc_neighbours_);CHKERRQ(ierr); 468 469 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 470 PetscTime(&t1); 471 PetscPrintf(PETSC_COMM_WORLD,"************************** Ending _DataExCompleteCommunicationMap [setup time: %1.4e (sec)] ************************** \n",t1-t0); 472 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "DataExTopologyFinalize" 478 PetscErrorCode DataExTopologyFinalize(DataEx d) 479 { 480 PetscMPIInt symm_nn; 481 PetscMPIInt *symm_procs; 482 PetscMPIInt r0,n,st,rt; 483 PetscMPIInt nprocs; 484 PetscErrorCode ierr; 485 486 487 PetscFunctionBegin; 488 if (d->topology_status != DEOBJECT_INITIALIZED) { 489 SETERRQ( d->comm, PETSC_ERR_ARG_WRONGSTATE, "Topology must be intialised. Call DataExTopologyInitialize() first" ); 490 } 491 ierr = PetscLogEventBegin(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr); 492 493 /* given infomation about all my neighbours, make map symmetric */ 494 ierr = _DataExCompleteCommunicationMap( d->comm,d->n_neighbour_procs,d->neighbour_procs, &symm_nn, &symm_procs );CHKERRQ(ierr); 495 /* update my arrays */ 496 free(d->neighbour_procs); 497 498 d->n_neighbour_procs = symm_nn; 499 d->neighbour_procs = symm_procs; 500 501 502 /* allocates memory */ 503 if (d->messages_to_be_sent == NULL) { 504 d->messages_to_be_sent = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs ); 505 } 506 if (d->message_offsets == NULL) { 507 d->message_offsets = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs ); 508 } 509 if (d->messages_to_be_recvieved == NULL) { 510 d->messages_to_be_recvieved = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs ); 511 } 512 513 if (d->pack_cnt == NULL) { 514 d->pack_cnt = (PetscInt*)malloc( sizeof(PetscInt) * d->n_neighbour_procs ); 515 } 516 517 if (d->_stats == NULL) { 518 d->_stats = (MPI_Status*)malloc( sizeof(MPI_Status) * 2*d->n_neighbour_procs ); 519 } 520 if (d->_requests == NULL) { 521 d->_requests = (MPI_Request*)malloc( sizeof(MPI_Request) * 2*d->n_neighbour_procs ); 522 } 523 524 if (d->send_tags == NULL) { 525 d->send_tags = (int*)malloc( sizeof(int) * d->n_neighbour_procs ); 526 } 527 if (d->recv_tags == NULL) { 528 d->recv_tags = (int*)malloc( sizeof(int) * d->n_neighbour_procs ); 529 } 530 531 /* compute message tags */ 532 ierr = MPI_Comm_size(d->comm,&nprocs);CHKERRQ(ierr); 533 r0 = d->rank; 534 for (n=0; n<d->n_neighbour_procs; n++) { 535 PetscMPIInt r1 = d->neighbour_procs[n]; 536 537 _get_tags( d->instance, nprocs, r0,r1, &st, &rt ); 538 539 d->send_tags[n] = (int)st; 540 d->recv_tags[n] = (int)rt; 541 } 542 543 d->topology_status = DEOBJECT_FINALIZED; 544 ierr = PetscLogEventEnd(PTATIN_DataExchangerTopologySetup,0,0,0,0);CHKERRQ(ierr); 545 546 PetscFunctionReturn(0); 547 } 548 549 /* === Phase B === */ 550 #undef __FUNCT__ 551 #define __FUNCT__ "_DataExConvertProcIdToLocalIndex" 552 PetscErrorCode _DataExConvertProcIdToLocalIndex(DataEx de,PetscMPIInt proc_id,PetscMPIInt *local) 553 { 554 PetscMPIInt i,np; 555 556 557 PetscFunctionBegin; 558 np = de->n_neighbour_procs; 559 560 *local = -1; 561 for (i=0; i<np; i++) { 562 if (proc_id == de->neighbour_procs[i]) { 563 *local = i; 564 break; 565 } 566 } 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "DataExInitializeSendCount" 572 PetscErrorCode DataExInitializeSendCount(DataEx de) 573 { 574 PetscMPIInt i; 575 576 577 PetscFunctionBegin; 578 if (de->topology_status != DEOBJECT_FINALIZED) { 579 SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" ); 580 } 581 582 de->message_lengths_status = DEOBJECT_INITIALIZED; 583 584 for (i=0; i<de->n_neighbour_procs; i++) { 585 de->messages_to_be_sent[i] = 0; 586 } 587 588 PetscFunctionReturn(0); 589 } 590 591 /* 592 1) only allows counters to be set on neighbouring cpus 593 */ 594 #undef __FUNCT__ 595 #define __FUNCT__ "DataExAddToSendCount" 596 PetscErrorCode DataExAddToSendCount(DataEx de,const PetscMPIInt proc_id,const PetscInt count) 597 { 598 PetscMPIInt local_val; 599 PetscErrorCode ierr; 600 601 602 PetscFunctionBegin; 603 if (de->message_lengths_status == DEOBJECT_FINALIZED) { 604 SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths have been defined. To modify these call DataExInitializeSendCount() first" ); 605 } 606 else if (de->message_lengths_status != DEOBJECT_INITIALIZED) { 607 SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" ); 608 } 609 610 ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local_val );CHKERRQ(ierr); 611 if (local_val == -1) { 612 SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,"Proc %d is not a valid neighbour rank", (int)proc_id ); 613 } 614 615 de->messages_to_be_sent[local_val] = de->messages_to_be_sent[local_val] + count; 616 PetscFunctionReturn(0); 617 } 618 619 #undef __FUNCT__ 620 #define __FUNCT__ "DataExFinalizeSendCount" 621 PetscErrorCode DataExFinalizeSendCount(DataEx de) 622 { 623 PetscFunctionBegin; 624 if (de->message_lengths_status != DEOBJECT_INITIALIZED) { 625 SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths must be defined. Call DataExInitializeSendCount() first" ); 626 } 627 de->message_lengths_status = DEOBJECT_FINALIZED; 628 629 PetscFunctionReturn(0); 630 } 631 632 /* === Phase C === */ 633 /* 634 * zero out all send counts 635 * free send and recv buffers 636 * zeros out message length 637 * zeros out all counters 638 * zero out packed data counters 639 */ 640 #undef __FUNCT__ 641 #define __FUNCT__ "_DataExInitializeTmpStorage" 642 PetscErrorCode _DataExInitializeTmpStorage(DataEx de) 643 { 644 PetscMPIInt i,np; 645 646 647 PetscFunctionBegin; 648 if (de->n_neighbour_procs < 0) { 649 SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of neighbour procs < 0"); 650 } 651 if (de->neighbour_procs == NULL) { 652 SETERRQ( PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "Neighbour proc list is NULL" ); 653 } 654 655 np = de->n_neighbour_procs; 656 for (i=0; i<np; i++) { 657 /* de->messages_to_be_sent[i] = -1; */ 658 de->messages_to_be_recvieved[i] = -1; 659 } 660 661 if (de->send_message != NULL) { 662 free(de->send_message); 663 de->send_message = NULL; 664 } 665 if (de->recv_message != NULL) { 666 free(de->recv_message); 667 de->recv_message = NULL; 668 } 669 670 PetscFunctionReturn(0); 671 } 672 673 /* 674 *) Zeros out pack data counters 675 *) Ensures mesaage length is set 676 *) Checks send counts properly initialized 677 *) allocates space for pack data 678 */ 679 #undef __FUNCT__ 680 #define __FUNCT__ "DataExPackInitialize" 681 PetscErrorCode DataExPackInitialize(DataEx de,size_t unit_message_size) 682 { 683 PetscMPIInt i,np; 684 PetscInt total; 685 PetscErrorCode ierr; 686 687 688 PetscFunctionBegin; 689 if (de->topology_status != DEOBJECT_FINALIZED) { 690 SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" ); 691 } 692 if (de->message_lengths_status != DEOBJECT_FINALIZED) { 693 SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" ); 694 } 695 696 de->packer_status = DEOBJECT_INITIALIZED; 697 698 ierr = _DataExInitializeTmpStorage(de);CHKERRQ(ierr); 699 700 np = de->n_neighbour_procs; 701 702 de->unit_message_size = unit_message_size; 703 704 total = 0; 705 for (i=0; i<np; i++) { 706 if (de->messages_to_be_sent[i] == -1) { 707 PetscMPIInt proc_neighour = de->neighbour_procs[i]; 708 SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ORDER, "Messages_to_be_sent[neighbour_proc=%d] is un-initialised. Call DataExSetSendCount() first", (int)proc_neighour ); 709 } 710 total = total + de->messages_to_be_sent[i]; 711 } 712 713 /* create space for the data to be sent */ 714 de->send_message = (void*)malloc( unit_message_size * (total + 1) ); 715 /* initialize memory */ 716 memset( de->send_message, 0, unit_message_size * (total + 1) ); 717 /* set total items to send */ 718 de->send_message_length = total; 719 720 de->message_offsets[0] = 0; 721 total = de->messages_to_be_sent[0]; 722 for (i=1; i<np; i++) { 723 de->message_offsets[i] = total; 724 total = total + de->messages_to_be_sent[i]; 725 } 726 727 /* init the packer counters */ 728 de->total_pack_cnt = 0; 729 for (i=0; i<np; i++) { 730 de->pack_cnt[i] = 0; 731 } 732 733 PetscFunctionReturn(0); 734 } 735 736 /* 737 *) Ensures data gets been packed appropriately and no overlaps occur 738 */ 739 #undef __FUNCT__ 740 #define __FUNCT__ "DataExPackData" 741 PetscErrorCode DataExPackData(DataEx de,PetscMPIInt proc_id,PetscInt n,void *data) 742 { 743 PetscMPIInt local; 744 PetscInt insert_location; 745 void *dest; 746 PetscErrorCode ierr; 747 748 749 PetscFunctionBegin; 750 if (de->packer_status == DEOBJECT_FINALIZED) { 751 SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data have been defined. To modify these call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" ); 752 } 753 else if (de->packer_status != DEOBJECT_INITIALIZED) { 754 SETERRQ( de->comm, PETSC_ERR_ORDER, "Packed data must be defined. Call DataExInitializeSendCount(), DataExAddToSendCount(), DataExPackInitialize() first" ); 755 } 756 757 758 if (de->send_message == NULL){ 759 SETERRQ( de->comm, PETSC_ERR_ORDER, "send_message is not initialized. Call DataExPackInitialize() first" ); 760 } 761 762 763 ierr = _DataExConvertProcIdToLocalIndex( de, proc_id, &local );CHKERRQ(ierr); 764 if (local == -1) { 765 SETERRQ1( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "proc_id %d is not registered neighbour", (int)proc_id ); 766 } 767 768 if (n+de->pack_cnt[local] > de->messages_to_be_sent[local]) { 769 SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to pack too many entries to be sent to proc %d. Space requested = %D: Attempt to insert %D", 770 (int)proc_id, de->messages_to_be_sent[local], n+de->pack_cnt[local] ); 771 772 /* don't need this - the catch for too many messages will pick this up. Gives us more info though */ 773 if (de->packer_status == DEOBJECT_FINALIZED) { 774 SETERRQ( de->comm, PETSC_ERR_ARG_WRONG, "Cannot insert any more data. DataExPackFinalize() has been called." ); 775 } 776 } 777 778 /* copy memory */ 779 insert_location = de->message_offsets[local] + de->pack_cnt[local]; 780 dest = ((char*)de->send_message) + de->unit_message_size*insert_location; 781 memcpy( dest, data, de->unit_message_size * n ); 782 783 /* increment counter */ 784 de->pack_cnt[local] = de->pack_cnt[local] + n; 785 786 PetscFunctionReturn(0); 787 } 788 789 /* 790 *) Ensures all data has been packed 791 */ 792 #undef __FUNCT__ 793 #define __FUNCT__ "DataExPackFinalize" 794 PetscErrorCode DataExPackFinalize(DataEx de) 795 { 796 PetscMPIInt i,np; 797 PetscInt total; 798 PetscErrorCode ierr; 799 800 801 PetscFunctionBegin; 802 if (de->packer_status != DEOBJECT_INITIALIZED) { 803 SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer has not been initialized. Must call DataExPackInitialize() first." ); 804 } 805 806 np = de->n_neighbour_procs; 807 808 for (i=0; i<np; i++) { 809 if (de->pack_cnt[i] != de->messages_to_be_sent[i]) { 810 SETERRQ3( PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not all messages for neighbour[%d] have been packed. Expected %D : Inserted %D", 811 (int)de->neighbour_procs[i], de->messages_to_be_sent[i], de->pack_cnt[i] ); 812 } 813 } 814 815 /* init */ 816 for (i=0; i<np; i++) { 817 de->messages_to_be_recvieved[i] = -1; 818 } 819 820 /* figure out the recv counts here */ 821 for (i=0; i<np; i++) { 822 // MPI_Send( &de->messages_to_be_sent[i], 1, MPI_INT, de->neighbour_procs[i], de->send_tags[i], de->comm ); 823 ierr = MPI_Isend( &de->messages_to_be_sent[i], 1, MPIU_INT, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr); 824 // MPI_Send( &de->messages_to_be_sent[i], 1, MPI_INT, de->neighbour_procs[i], 0, de->comm ); 825 } 826 for (i=0; i<np; i++) { 827 // MPI_Recv( &de->messages_to_be_recvieved[i], 1, MPI_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &stat ); 828 ierr = MPI_Irecv( &de->messages_to_be_recvieved[i], 1, MPIU_INT, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr); 829 // MPI_Recv( &de->messages_to_be_recvieved[i], 1, MPI_INT, de->neighbour_procs[i], 0, de->comm, &stat ); 830 } 831 ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr); 832 833 /* create space for the data to be recvieved */ 834 total = 0; 835 for (i=0; i<np; i++) { 836 total = total + de->messages_to_be_recvieved[i]; 837 } 838 de->recv_message = (void*)malloc( de->unit_message_size * (total + 1) ); 839 /* initialize memory */ 840 memset( de->recv_message, 0, de->unit_message_size * (total + 1) ); 841 /* set total items to recieve */ 842 de->recv_message_length = total; 843 844 de->packer_status = DEOBJECT_FINALIZED; 845 846 de->communication_status = DEOBJECT_INITIALIZED; 847 848 PetscFunctionReturn(0); 849 } 850 851 /* do the actual message passing now */ 852 #undef __FUNCT__ 853 #define __FUNCT__ "DataExBegin" 854 PetscErrorCode DataExBegin(DataEx de) 855 { 856 PetscMPIInt i,np; 857 void *dest; 858 PetscInt length; 859 PetscErrorCode ierr; 860 861 862 PetscFunctionBegin; 863 if (de->topology_status != DEOBJECT_FINALIZED) { 864 SETERRQ( de->comm, PETSC_ERR_ORDER, "Topology not finalized" ); 865 } 866 if (de->message_lengths_status != DEOBJECT_FINALIZED) { 867 SETERRQ( de->comm, PETSC_ERR_ORDER, "Message lengths not finalized" ); 868 } 869 if (de->packer_status != DEOBJECT_FINALIZED) { 870 SETERRQ( de->comm, PETSC_ERR_ORDER, "Packer not finalized" ); 871 } 872 873 if (de->communication_status == DEOBJECT_FINALIZED) { 874 SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has already been finalized. Must call DataExInitialize() first." ); 875 } 876 877 if (de->recv_message == NULL) { 878 SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" ); 879 } 880 881 ierr = PetscLogEventBegin(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr); 882 np = de->n_neighbour_procs; 883 884 /* == NON BLOCKING == */ 885 for (i=0; i<np; i++) { 886 length = de->messages_to_be_sent[i] * de->unit_message_size; 887 dest = ((char*)de->send_message) + de->unit_message_size * de->message_offsets[i]; 888 ierr = MPI_Isend( dest, length, MPI_CHAR, de->neighbour_procs[i], de->send_tags[i], de->comm, &de->_requests[i] );CHKERRQ(ierr); 889 } 890 891 ierr = PetscLogEventEnd(PTATIN_DataExchangerBegin,0,0,0,0);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 /* do the actual message passing now */ 896 #undef __FUNCT__ 897 #define __FUNCT__ "DataExEnd" 898 PetscErrorCode DataExEnd(DataEx de) 899 { 900 PetscMPIInt i,np; 901 PetscInt total; 902 PetscInt *message_recv_offsets; 903 void *dest; 904 PetscInt length; 905 PetscErrorCode ierr; 906 907 908 PetscFunctionBegin; 909 if (de->communication_status != DEOBJECT_INITIALIZED) { 910 SETERRQ( de->comm, PETSC_ERR_ORDER, "Communication has not been initialized. Must call DataExInitialize() first." ); 911 } 912 if (de->recv_message == NULL) { 913 SETERRQ( de->comm, PETSC_ERR_ORDER, "recv_message has not been initialized. Must call DataExPackFinalize() first" ); 914 } 915 916 ierr = PetscLogEventBegin(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr); 917 np = de->n_neighbour_procs; 918 919 message_recv_offsets = (PetscInt*)malloc( sizeof(PetscInt) * np ); 920 message_recv_offsets[0] = 0; 921 total = de->messages_to_be_recvieved[0]; 922 for (i=1; i<np; i++) { 923 message_recv_offsets[i] = total; 924 total = total + de->messages_to_be_recvieved[i]; 925 } 926 927 /* == NON BLOCKING == */ 928 for (i=0; i<np; i++) { 929 length = de->messages_to_be_recvieved[i] * de->unit_message_size; 930 dest = ((char*)de->recv_message) + de->unit_message_size * message_recv_offsets[i]; 931 ierr = MPI_Irecv( dest, length, MPI_CHAR, de->neighbour_procs[i], de->recv_tags[i], de->comm, &de->_requests[np+i] );CHKERRQ(ierr); 932 } 933 ierr = MPI_Waitall( 2*np, de->_requests, de->_stats );CHKERRQ(ierr); 934 935 free(message_recv_offsets); 936 937 de->communication_status = DEOBJECT_FINALIZED; 938 ierr = PetscLogEventEnd(PTATIN_DataExchangerEnd,0,0,0,0);CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "DataExGetSendData" 944 PetscErrorCode DataExGetSendData(DataEx de,PetscInt *length,void **send) 945 { 946 PetscFunctionBegin; 947 if (de->packer_status != DEOBJECT_FINALIZED) { 948 SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being packed." ); 949 } 950 *length = de->send_message_length; 951 *send = de->send_message; 952 PetscFunctionReturn(0); 953 } 954 955 #undef __FUNCT__ 956 #define __FUNCT__ "DataExGetRecvData" 957 PetscErrorCode DataExGetRecvData(DataEx de,PetscInt *length,void **recv) 958 { 959 PetscFunctionBegin; 960 if (de->communication_status != DEOBJECT_FINALIZED) { 961 SETERRQ( de->comm, PETSC_ERR_ARG_WRONGSTATE, "Data has not finished being sent." ); 962 } 963 *length = de->recv_message_length; 964 *recv = de->recv_message; 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "DataExTopologyGetNeighbours" 970 PetscErrorCode DataExTopologyGetNeighbours(DataEx de,PetscMPIInt *n,PetscMPIInt *neigh[]) 971 { 972 PetscFunctionBegin; 973 if (n) { *n = de->n_neighbour_procs; } 974 if (neigh) { *neigh = de->neighbour_procs; } 975 PetscFunctionReturn(0); 976 } 977 978