1 2 #include <petsc-private/matimpl.h> 3 4 #define DEFAULT_STASH_SIZE 10000 5 6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 9 /* 10 MatStashCreate_Private - Creates a stash,currently used for all the parallel 11 matrix implementations. The stash is where elements of a matrix destined 12 to be stored on other processors are kept until matrix assembly is done. 13 14 This is a simple minded stash. Simply adds entries to end of stash. 15 16 Input Parameters: 17 comm - communicator, required for scatters. 18 bs - stash block size. used when stashing blocks of values 19 20 Output Parameters: 21 stash - the newly created stash 22 */ 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatStashCreate_Private" 25 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 26 { 27 PetscErrorCode ierr; 28 PetscInt max,*opt,nopt,i; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 33 stash->comm = comm; 34 35 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 36 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 37 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 38 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 39 ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); 40 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 41 42 43 nopt = stash->size; 44 ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); 45 ierr = PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 46 if (flg) { 47 if (nopt == 1) max = opt[0]; 48 else if (nopt == stash->size) max = opt[stash->rank]; 49 else if (stash->rank < nopt) max = opt[stash->rank]; 50 else max = 0; /* Use default */ 51 stash->umax = max; 52 } else { 53 stash->umax = 0; 54 } 55 ierr = PetscFree(opt);CHKERRQ(ierr); 56 if (bs <= 0) bs = 1; 57 58 stash->bs = bs; 59 stash->nmax = 0; 60 stash->oldnmax = 0; 61 stash->n = 0; 62 stash->reallocs = -1; 63 stash->space_head = 0; 64 stash->space = 0; 65 66 stash->send_waits = 0; 67 stash->recv_waits = 0; 68 stash->send_status = 0; 69 stash->nsends = 0; 70 stash->nrecvs = 0; 71 stash->svalues = 0; 72 stash->rvalues = 0; 73 stash->rindices = 0; 74 stash->nprocessed = 0; 75 stash->reproduce = PETSC_FALSE; 76 77 ierr = PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); 78 ierr = PetscOptionsGetBool(NULL,"-matstash_bts",&flg,NULL);CHKERRQ(ierr); 79 if (flg) { 80 SETERRQ(comm,PETSC_ERR_SUP,"Not implemented yet"); 81 } else { 82 stash->ScatterBegin = MatStashScatterBegin_Ref; 83 stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 84 stash->ScatterEnd = MatStashScatterEnd_Ref; 85 stash->ScatterDestroy = NULL; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 /* 91 MatStashDestroy_Private - Destroy the stash 92 */ 93 #undef __FUNCT__ 94 #define __FUNCT__ "MatStashDestroy_Private" 95 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 96 { 97 PetscErrorCode ierr; 98 99 PetscFunctionBegin; 100 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 101 if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);} 102 103 stash->space = 0; 104 105 ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 /* 110 MatStashScatterEnd_Private - This is called as the final stage of 111 scatter. The final stages of message passing is done here, and 112 all the memory used for message passing is cleaned up. This 113 routine also resets the stash, and deallocates the memory used 114 for the stash. It also keeps track of the current memory usage 115 so that the same value can be used the next time through. 116 */ 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatStashScatterEnd_Private" 119 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 120 { 121 PetscErrorCode ierr; 122 123 PetscFunctionBegin; 124 ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatStashScatterEnd_Ref" 130 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 131 { 132 PetscErrorCode ierr; 133 PetscInt nsends=stash->nsends,bs2,oldnmax,i; 134 MPI_Status *send_status; 135 136 PetscFunctionBegin; 137 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 138 /* wait on sends */ 139 if (nsends) { 140 ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr); 141 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 142 ierr = PetscFree(send_status);CHKERRQ(ierr); 143 } 144 145 /* Now update nmaxold to be app 10% more than max n used, this way the 146 wastage of space is reduced the next time this stash is used. 147 Also update the oldmax, only if it increases */ 148 if (stash->n) { 149 bs2 = stash->bs*stash->bs; 150 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 151 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 152 } 153 154 stash->nmax = 0; 155 stash->n = 0; 156 stash->reallocs = -1; 157 stash->nprocessed = 0; 158 159 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 160 161 stash->space = 0; 162 163 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 164 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 165 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 166 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 167 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 168 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 169 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 170 PetscFunctionReturn(0); 171 } 172 173 /* 174 MatStashGetInfo_Private - Gets the relavant statistics of the stash 175 176 Input Parameters: 177 stash - the stash 178 nstash - the size of the stash. Indicates the number of values stored. 179 reallocs - the number of additional mallocs incurred. 180 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatStashGetInfo_Private" 184 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 185 { 186 PetscInt bs2 = stash->bs*stash->bs; 187 188 PetscFunctionBegin; 189 if (nstash) *nstash = stash->n*bs2; 190 if (reallocs) { 191 if (stash->reallocs < 0) *reallocs = 0; 192 else *reallocs = stash->reallocs; 193 } 194 PetscFunctionReturn(0); 195 } 196 197 /* 198 MatStashSetInitialSize_Private - Sets the initial size of the stash 199 200 Input Parameters: 201 stash - the stash 202 max - the value that is used as the max size of the stash. 203 this value is used while allocating memory. 204 */ 205 #undef __FUNCT__ 206 #define __FUNCT__ "MatStashSetInitialSize_Private" 207 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 208 { 209 PetscFunctionBegin; 210 stash->umax = max; 211 PetscFunctionReturn(0); 212 } 213 214 /* MatStashExpand_Private - Expand the stash. This function is called 215 when the space in the stash is not sufficient to add the new values 216 being inserted into the stash. 217 218 Input Parameters: 219 stash - the stash 220 incr - the minimum increase requested 221 222 Notes: 223 This routine doubles the currently used memory. 224 */ 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatStashExpand_Private" 227 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 228 { 229 PetscErrorCode ierr; 230 PetscInt newnmax,bs2= stash->bs*stash->bs; 231 232 PetscFunctionBegin; 233 /* allocate a larger stash */ 234 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 235 if (stash->umax) newnmax = stash->umax/bs2; 236 else newnmax = DEFAULT_STASH_SIZE/bs2; 237 } else if (!stash->nmax) { /* resuing stash */ 238 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 239 else newnmax = stash->oldnmax/bs2; 240 } else newnmax = stash->nmax*2; 241 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 242 243 /* Get a MatStashSpace and attach it to stash */ 244 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 245 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 246 stash->space_head = stash->space; 247 } 248 249 stash->reallocs++; 250 stash->nmax = newnmax; 251 PetscFunctionReturn(0); 252 } 253 /* 254 MatStashValuesRow_Private - inserts values into the stash. This function 255 expects the values to be roworiented. Multiple columns belong to the same row 256 can be inserted with a single call to this function. 257 258 Input Parameters: 259 stash - the stash 260 row - the global row correspoiding to the values 261 n - the number of elements inserted. All elements belong to the above row. 262 idxn - the global column indices corresponding to each of the values. 263 values - the values inserted 264 */ 265 #undef __FUNCT__ 266 #define __FUNCT__ "MatStashValuesRow_Private" 267 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 268 { 269 PetscErrorCode ierr; 270 PetscInt i,k,cnt = 0; 271 PetscMatStashSpace space=stash->space; 272 273 PetscFunctionBegin; 274 /* Check and see if we have sufficient memory */ 275 if (!space || space->local_remaining < n) { 276 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 277 } 278 space = stash->space; 279 k = space->local_used; 280 for (i=0; i<n; i++) { 281 if (ignorezeroentries && (values[i] == 0.0)) continue; 282 space->idx[k] = row; 283 space->idy[k] = idxn[i]; 284 space->val[k] = values[i]; 285 k++; 286 cnt++; 287 } 288 stash->n += cnt; 289 space->local_used += cnt; 290 space->local_remaining -= cnt; 291 PetscFunctionReturn(0); 292 } 293 294 /* 295 MatStashValuesCol_Private - inserts values into the stash. This function 296 expects the values to be columnoriented. Multiple columns belong to the same row 297 can be inserted with a single call to this function. 298 299 Input Parameters: 300 stash - the stash 301 row - the global row correspoiding to the values 302 n - the number of elements inserted. All elements belong to the above row. 303 idxn - the global column indices corresponding to each of the values. 304 values - the values inserted 305 stepval - the consecutive values are sepated by a distance of stepval. 306 this happens because the input is columnoriented. 307 */ 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatStashValuesCol_Private" 310 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 311 { 312 PetscErrorCode ierr; 313 PetscInt i,k,cnt = 0; 314 PetscMatStashSpace space=stash->space; 315 316 PetscFunctionBegin; 317 /* Check and see if we have sufficient memory */ 318 if (!space || space->local_remaining < n) { 319 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 320 } 321 space = stash->space; 322 k = space->local_used; 323 for (i=0; i<n; i++) { 324 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 325 space->idx[k] = row; 326 space->idy[k] = idxn[i]; 327 space->val[k] = values[i*stepval]; 328 k++; 329 cnt++; 330 } 331 stash->n += cnt; 332 space->local_used += cnt; 333 space->local_remaining -= cnt; 334 PetscFunctionReturn(0); 335 } 336 337 /* 338 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 339 This function expects the values to be roworiented. Multiple columns belong 340 to the same block-row can be inserted with a single call to this function. 341 This function extracts the sub-block of values based on the dimensions of 342 the original input block, and the row,col values corresponding to the blocks. 343 344 Input Parameters: 345 stash - the stash 346 row - the global block-row correspoiding to the values 347 n - the number of elements inserted. All elements belong to the above row. 348 idxn - the global block-column indices corresponding to each of the blocks of 349 values. Each block is of size bs*bs. 350 values - the values inserted 351 rmax - the number of block-rows in the original block. 352 cmax - the number of block-columsn on the original block. 353 idx - the index of the current block-row in the original block. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatStashValuesRowBlocked_Private" 357 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 358 { 359 PetscErrorCode ierr; 360 PetscInt i,j,k,bs2,bs=stash->bs,l; 361 const PetscScalar *vals; 362 PetscScalar *array; 363 PetscMatStashSpace space=stash->space; 364 365 PetscFunctionBegin; 366 if (!space || space->local_remaining < n) { 367 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 368 } 369 space = stash->space; 370 l = space->local_used; 371 bs2 = bs*bs; 372 for (i=0; i<n; i++) { 373 space->idx[l] = row; 374 space->idy[l] = idxn[i]; 375 /* Now copy over the block of values. Store the values column oriented. 376 This enables inserting multiple blocks belonging to a row with a single 377 funtion call */ 378 array = space->val + bs2*l; 379 vals = values + idx*bs2*n + bs*i; 380 for (j=0; j<bs; j++) { 381 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 382 array++; 383 vals += cmax*bs; 384 } 385 l++; 386 } 387 stash->n += n; 388 space->local_used += n; 389 space->local_remaining -= n; 390 PetscFunctionReturn(0); 391 } 392 393 /* 394 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 395 This function expects the values to be roworiented. Multiple columns belong 396 to the same block-row can be inserted with a single call to this function. 397 This function extracts the sub-block of values based on the dimensions of 398 the original input block, and the row,col values corresponding to the blocks. 399 400 Input Parameters: 401 stash - the stash 402 row - the global block-row correspoiding to the values 403 n - the number of elements inserted. All elements belong to the above row. 404 idxn - the global block-column indices corresponding to each of the blocks of 405 values. Each block is of size bs*bs. 406 values - the values inserted 407 rmax - the number of block-rows in the original block. 408 cmax - the number of block-columsn on the original block. 409 idx - the index of the current block-row in the original block. 410 */ 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatStashValuesColBlocked_Private" 413 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 414 { 415 PetscErrorCode ierr; 416 PetscInt i,j,k,bs2,bs=stash->bs,l; 417 const PetscScalar *vals; 418 PetscScalar *array; 419 PetscMatStashSpace space=stash->space; 420 421 PetscFunctionBegin; 422 if (!space || space->local_remaining < n) { 423 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 424 } 425 space = stash->space; 426 l = space->local_used; 427 bs2 = bs*bs; 428 for (i=0; i<n; i++) { 429 space->idx[l] = row; 430 space->idy[l] = idxn[i]; 431 /* Now copy over the block of values. Store the values column oriented. 432 This enables inserting multiple blocks belonging to a row with a single 433 funtion call */ 434 array = space->val + bs2*l; 435 vals = values + idx*bs2*n + bs*i; 436 for (j=0; j<bs; j++) { 437 for (k=0; k<bs; k++) array[k] = vals[k]; 438 array += bs; 439 vals += rmax*bs; 440 } 441 l++; 442 } 443 stash->n += n; 444 space->local_used += n; 445 space->local_remaining -= n; 446 PetscFunctionReturn(0); 447 } 448 /* 449 MatStashScatterBegin_Private - Initiates the transfer of values to the 450 correct owners. This function goes through the stash, and check the 451 owners of each stashed value, and sends the values off to the owner 452 processors. 453 454 Input Parameters: 455 stash - the stash 456 owners - an array of size 'no-of-procs' which gives the ownership range 457 for each node. 458 459 Notes: The 'owners' array in the cased of the blocked-stash has the 460 ranges specified blocked global indices, and for the regular stash in 461 the proper global indices. 462 */ 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatStashScatterBegin_Private" 465 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 466 { 467 PetscErrorCode ierr; 468 469 PetscFunctionBegin; 470 ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatStashScatterBegin_Ref" 476 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 477 { 478 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 479 PetscInt size=stash->size,nsends; 480 PetscErrorCode ierr; 481 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 482 PetscScalar **rvalues,*svalues; 483 MPI_Comm comm = stash->comm; 484 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 485 PetscMPIInt *sizes,*nlengths,nreceives; 486 PetscInt *sp_idx,*sp_idy; 487 PetscScalar *sp_val; 488 PetscMatStashSpace space,space_next; 489 490 PetscFunctionBegin; 491 bs2 = stash->bs*stash->bs; 492 493 /* first count number of contributors to each processor */ 494 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 495 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 496 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 497 498 i = j = 0; 499 lastidx = -1; 500 space = stash->space_head; 501 while (space != NULL) { 502 space_next = space->next; 503 sp_idx = space->idx; 504 for (l=0; l<space->local_used; l++) { 505 /* if indices are NOT locally sorted, need to start search at the beginning */ 506 if (lastidx > (idx = sp_idx[l])) j = 0; 507 lastidx = idx; 508 for (; j<size; j++) { 509 if (idx >= owners[j] && idx < owners[j+1]) { 510 nlengths[j]++; owner[i] = j; break; 511 } 512 } 513 i++; 514 } 515 space = space_next; 516 } 517 /* Now check what procs get messages - and compute nsends. */ 518 for (i=0, nsends=0; i<size; i++) { 519 if (nlengths[i]) { 520 sizes[i] = 1; nsends++; 521 } 522 } 523 524 {PetscMPIInt *onodes,*olengths; 525 /* Determine the number of messages to expect, their lengths, from from-ids */ 526 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 527 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 528 /* since clubbing row,col - lengths are multiplied by 2 */ 529 for (i=0; i<nreceives; i++) olengths[i] *=2; 530 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 531 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 532 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 533 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 534 ierr = PetscFree(onodes);CHKERRQ(ierr); 535 ierr = PetscFree(olengths);CHKERRQ(ierr);} 536 537 /* do sends: 538 1) starts[i] gives the starting index in svalues for stuff going to 539 the ith processor 540 */ 541 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 542 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 543 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 544 /* use 2 sends the first with all_a, the next with all_i and all_j */ 545 startv[0] = 0; starti[0] = 0; 546 for (i=1; i<size; i++) { 547 startv[i] = startv[i-1] + nlengths[i-1]; 548 starti[i] = starti[i-1] + 2*nlengths[i-1]; 549 } 550 551 i = 0; 552 space = stash->space_head; 553 while (space != NULL) { 554 space_next = space->next; 555 sp_idx = space->idx; 556 sp_idy = space->idy; 557 sp_val = space->val; 558 for (l=0; l<space->local_used; l++) { 559 j = owner[i]; 560 if (bs2 == 1) { 561 svalues[startv[j]] = sp_val[l]; 562 } else { 563 PetscInt k; 564 PetscScalar *buf1,*buf2; 565 buf1 = svalues+bs2*startv[j]; 566 buf2 = space->val + bs2*l; 567 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 568 } 569 sindices[starti[j]] = sp_idx[l]; 570 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 571 startv[j]++; 572 starti[j]++; 573 i++; 574 } 575 space = space_next; 576 } 577 startv[0] = 0; 578 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 579 580 for (i=0,count=0; i<size; i++) { 581 if (sizes[i]) { 582 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 583 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 584 } 585 } 586 #if defined(PETSC_USE_INFO) 587 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 588 for (i=0; i<size; i++) { 589 if (sizes[i]) { 590 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 591 } 592 } 593 #endif 594 ierr = PetscFree(nlengths);CHKERRQ(ierr); 595 ierr = PetscFree(owner);CHKERRQ(ierr); 596 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 597 ierr = PetscFree(sizes);CHKERRQ(ierr); 598 599 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 600 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 601 602 for (i=0; i<nreceives; i++) { 603 recv_waits[2*i] = recv_waits1[i]; 604 recv_waits[2*i+1] = recv_waits2[i]; 605 } 606 stash->recv_waits = recv_waits; 607 608 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 609 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 610 611 stash->svalues = svalues; 612 stash->sindices = sindices; 613 stash->rvalues = rvalues; 614 stash->rindices = rindices; 615 stash->send_waits = send_waits; 616 stash->nsends = nsends; 617 stash->nrecvs = nreceives; 618 stash->reproduce_count = 0; 619 PetscFunctionReturn(0); 620 } 621 622 /* 623 MatStashScatterGetMesg_Private - This function waits on the receives posted 624 in the function MatStashScatterBegin_Private() and returns one message at 625 a time to the calling function. If no messages are left, it indicates this 626 by setting flg = 0, else it sets flg = 1. 627 628 Input Parameters: 629 stash - the stash 630 631 Output Parameters: 632 nvals - the number of entries in the current message. 633 rows - an array of row indices (or blocked indices) corresponding to the values 634 cols - an array of columnindices (or blocked indices) corresponding to the values 635 vals - the values 636 flg - 0 indicates no more message left, and the current call has no values associated. 637 1 indicates that the current call successfully received a message, and the 638 other output parameters nvals,rows,cols,vals are set appropriately. 639 */ 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatStashScatterGetMesg_Private" 642 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 643 { 644 PetscErrorCode ierr; 645 646 PetscFunctionBegin; 647 ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr); 648 PetscFunctionReturn(0); 649 } 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatStashScatterGetMesg_Ref" 653 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 654 { 655 PetscErrorCode ierr; 656 PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 657 PetscInt bs2; 658 MPI_Status recv_status; 659 PetscBool match_found = PETSC_FALSE; 660 661 PetscFunctionBegin; 662 *flg = 0; /* When a message is discovered this is reset to 1 */ 663 /* Return if no more messages to process */ 664 if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 665 666 bs2 = stash->bs*stash->bs; 667 /* If a matching pair of receives are found, process them, and return the data to 668 the calling function. Until then keep receiving messages */ 669 while (!match_found) { 670 if (stash->reproduce) { 671 i = stash->reproduce_count++; 672 ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 673 } else { 674 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 675 } 676 if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 677 678 /* Now pack the received message into a structure which is usable by others */ 679 if (i % 2) { 680 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 681 682 flg_v[2*recv_status.MPI_SOURCE] = i/2; 683 684 *nvals = *nvals/bs2; 685 } else { 686 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 687 688 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 689 690 *nvals = *nvals/2; /* This message has both row indices and col indices */ 691 } 692 693 /* Check if we have both messages from this proc */ 694 i1 = flg_v[2*recv_status.MPI_SOURCE]; 695 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 696 if (i1 != -1 && i2 != -1) { 697 *rows = stash->rindices[i2]; 698 *cols = *rows + *nvals; 699 *vals = stash->rvalues[i1]; 700 *flg = 1; 701 stash->nprocessed++; 702 match_found = PETSC_TRUE; 703 } 704 } 705 PetscFunctionReturn(0); 706 } 707