1 2 #include <petsc/private/matimpl.h> 3 4 #define DEFAULT_STASH_SIZE 10000 5 6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 9 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*); 10 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 11 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*); 12 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*); 13 14 /* 15 MatStashCreate_Private - Creates a stash,currently used for all the parallel 16 matrix implementations. The stash is where elements of a matrix destined 17 to be stored on other processors are kept until matrix assembly is done. 18 19 This is a simple minded stash. Simply adds entries to end of stash. 20 21 Input Parameters: 22 comm - communicator, required for scatters. 23 bs - stash block size. used when stashing blocks of values 24 25 Output Parameters: 26 stash - the newly created stash 27 */ 28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 29 { 30 PetscErrorCode ierr; 31 PetscInt max,*opt,nopt,i; 32 PetscBool flg; 33 34 PetscFunctionBegin; 35 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 36 stash->comm = comm; 37 38 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 39 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 40 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 41 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 42 ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); 43 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 44 45 46 nopt = stash->size; 47 ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); 48 ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 49 if (flg) { 50 if (nopt == 1) max = opt[0]; 51 else if (nopt == stash->size) max = opt[stash->rank]; 52 else if (stash->rank < nopt) max = opt[stash->rank]; 53 else max = 0; /* Use default */ 54 stash->umax = max; 55 } else { 56 stash->umax = 0; 57 } 58 ierr = PetscFree(opt);CHKERRQ(ierr); 59 if (bs <= 0) bs = 1; 60 61 stash->bs = bs; 62 stash->nmax = 0; 63 stash->oldnmax = 0; 64 stash->n = 0; 65 stash->reallocs = -1; 66 stash->space_head = 0; 67 stash->space = 0; 68 69 stash->send_waits = 0; 70 stash->recv_waits = 0; 71 stash->send_status = 0; 72 stash->nsends = 0; 73 stash->nrecvs = 0; 74 stash->svalues = 0; 75 stash->rvalues = 0; 76 stash->rindices = 0; 77 stash->nprocessed = 0; 78 stash->reproduce = PETSC_FALSE; 79 stash->blocktype = MPI_DATATYPE_NULL; 80 81 ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); 82 #if !defined(PETSC_HAVE_MPIUNI) 83 ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr); 84 if (!flg) { 85 stash->ScatterBegin = MatStashScatterBegin_BTS; 86 stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 87 stash->ScatterEnd = MatStashScatterEnd_BTS; 88 stash->ScatterDestroy = MatStashScatterDestroy_BTS; 89 } else { 90 #endif 91 stash->ScatterBegin = MatStashScatterBegin_Ref; 92 stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 93 stash->ScatterEnd = MatStashScatterEnd_Ref; 94 stash->ScatterDestroy = NULL; 95 #if !defined(PETSC_HAVE_MPIUNI) 96 } 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 /* 102 MatStashDestroy_Private - Destroy the stash 103 */ 104 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 105 { 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 110 if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);} 111 112 stash->space = 0; 113 114 ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 /* 119 MatStashScatterEnd_Private - This is called as the final stage of 120 scatter. The final stages of message passing is done here, and 121 all the memory used for message passing is cleaned up. This 122 routine also resets the stash, and deallocates the memory used 123 for the stash. It also keeps track of the current memory usage 124 so that the same value can be used the next time through. 125 */ 126 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 135 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 136 { 137 PetscErrorCode ierr; 138 PetscInt nsends=stash->nsends,bs2,oldnmax,i; 139 MPI_Status *send_status; 140 141 PetscFunctionBegin; 142 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 143 /* wait on sends */ 144 if (nsends) { 145 ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr); 146 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 147 ierr = PetscFree(send_status);CHKERRQ(ierr); 148 } 149 150 /* Now update nmaxold to be app 10% more than max n used, this way the 151 wastage of space is reduced the next time this stash is used. 152 Also update the oldmax, only if it increases */ 153 if (stash->n) { 154 bs2 = stash->bs*stash->bs; 155 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 156 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 157 } 158 159 stash->nmax = 0; 160 stash->n = 0; 161 stash->reallocs = -1; 162 stash->nprocessed = 0; 163 164 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 165 166 stash->space = 0; 167 168 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 169 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 170 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 171 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 172 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 173 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 174 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 175 PetscFunctionReturn(0); 176 } 177 178 /* 179 MatStashGetInfo_Private - Gets the relavant statistics of the stash 180 181 Input Parameters: 182 stash - the stash 183 nstash - the size of the stash. Indicates the number of values stored. 184 reallocs - the number of additional mallocs incurred. 185 186 */ 187 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 188 { 189 PetscInt bs2 = stash->bs*stash->bs; 190 191 PetscFunctionBegin; 192 if (nstash) *nstash = stash->n*bs2; 193 if (reallocs) { 194 if (stash->reallocs < 0) *reallocs = 0; 195 else *reallocs = stash->reallocs; 196 } 197 PetscFunctionReturn(0); 198 } 199 200 /* 201 MatStashSetInitialSize_Private - Sets the initial size of the stash 202 203 Input Parameters: 204 stash - the stash 205 max - the value that is used as the max size of the stash. 206 this value is used while allocating memory. 207 */ 208 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 209 { 210 PetscFunctionBegin; 211 stash->umax = max; 212 PetscFunctionReturn(0); 213 } 214 215 /* MatStashExpand_Private - Expand the stash. This function is called 216 when the space in the stash is not sufficient to add the new values 217 being inserted into the stash. 218 219 Input Parameters: 220 stash - the stash 221 incr - the minimum increase requested 222 223 Notes: 224 This routine doubles the currently used memory. 225 */ 226 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 227 { 228 PetscErrorCode ierr; 229 PetscInt newnmax,bs2= stash->bs*stash->bs; 230 231 PetscFunctionBegin; 232 /* allocate a larger stash */ 233 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 234 if (stash->umax) newnmax = stash->umax/bs2; 235 else newnmax = DEFAULT_STASH_SIZE/bs2; 236 } else if (!stash->nmax) { /* resuing stash */ 237 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 238 else newnmax = stash->oldnmax/bs2; 239 } else newnmax = stash->nmax*2; 240 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 241 242 /* Get a MatStashSpace and attach it to stash */ 243 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 244 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 245 stash->space_head = stash->space; 246 } 247 248 stash->reallocs++; 249 stash->nmax = newnmax; 250 PetscFunctionReturn(0); 251 } 252 /* 253 MatStashValuesRow_Private - inserts values into the stash. This function 254 expects the values to be roworiented. Multiple columns belong to the same row 255 can be inserted with a single call to this function. 256 257 Input Parameters: 258 stash - the stash 259 row - the global row correspoiding to the values 260 n - the number of elements inserted. All elements belong to the above row. 261 idxn - the global column indices corresponding to each of the values. 262 values - the values inserted 263 */ 264 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 265 { 266 PetscErrorCode ierr; 267 PetscInt i,k,cnt = 0; 268 PetscMatStashSpace space=stash->space; 269 270 PetscFunctionBegin; 271 /* Check and see if we have sufficient memory */ 272 if (!space || space->local_remaining < n) { 273 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 274 } 275 space = stash->space; 276 k = space->local_used; 277 for (i=0; i<n; i++) { 278 if (ignorezeroentries && (values[i] == 0.0)) continue; 279 space->idx[k] = row; 280 space->idy[k] = idxn[i]; 281 space->val[k] = values[i]; 282 k++; 283 cnt++; 284 } 285 stash->n += cnt; 286 space->local_used += cnt; 287 space->local_remaining -= cnt; 288 PetscFunctionReturn(0); 289 } 290 291 /* 292 MatStashValuesCol_Private - inserts values into the stash. This function 293 expects the values to be columnoriented. Multiple columns belong to the same row 294 can be inserted with a single call to this function. 295 296 Input Parameters: 297 stash - the stash 298 row - the global row correspoiding to the values 299 n - the number of elements inserted. All elements belong to the above row. 300 idxn - the global column indices corresponding to each of the values. 301 values - the values inserted 302 stepval - the consecutive values are sepated by a distance of stepval. 303 this happens because the input is columnoriented. 304 */ 305 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 306 { 307 PetscErrorCode ierr; 308 PetscInt i,k,cnt = 0; 309 PetscMatStashSpace space=stash->space; 310 311 PetscFunctionBegin; 312 /* Check and see if we have sufficient memory */ 313 if (!space || space->local_remaining < n) { 314 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 315 } 316 space = stash->space; 317 k = space->local_used; 318 for (i=0; i<n; i++) { 319 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 320 space->idx[k] = row; 321 space->idy[k] = idxn[i]; 322 space->val[k] = values[i*stepval]; 323 k++; 324 cnt++; 325 } 326 stash->n += cnt; 327 space->local_used += cnt; 328 space->local_remaining -= cnt; 329 PetscFunctionReturn(0); 330 } 331 332 /* 333 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 334 This function expects the values to be roworiented. Multiple columns belong 335 to the same block-row can be inserted with a single call to this function. 336 This function extracts the sub-block of values based on the dimensions of 337 the original input block, and the row,col values corresponding to the blocks. 338 339 Input Parameters: 340 stash - the stash 341 row - the global block-row correspoiding to the values 342 n - the number of elements inserted. All elements belong to the above row. 343 idxn - the global block-column indices corresponding to each of the blocks of 344 values. Each block is of size bs*bs. 345 values - the values inserted 346 rmax - the number of block-rows in the original block. 347 cmax - the number of block-columsn on the original block. 348 idx - the index of the current block-row in the original block. 349 */ 350 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 351 { 352 PetscErrorCode ierr; 353 PetscInt i,j,k,bs2,bs=stash->bs,l; 354 const PetscScalar *vals; 355 PetscScalar *array; 356 PetscMatStashSpace space=stash->space; 357 358 PetscFunctionBegin; 359 if (!space || space->local_remaining < n) { 360 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 361 } 362 space = stash->space; 363 l = space->local_used; 364 bs2 = bs*bs; 365 for (i=0; i<n; i++) { 366 space->idx[l] = row; 367 space->idy[l] = idxn[i]; 368 /* Now copy over the block of values. Store the values column oriented. 369 This enables inserting multiple blocks belonging to a row with a single 370 funtion call */ 371 array = space->val + bs2*l; 372 vals = values + idx*bs2*n + bs*i; 373 for (j=0; j<bs; j++) { 374 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 375 array++; 376 vals += cmax*bs; 377 } 378 l++; 379 } 380 stash->n += n; 381 space->local_used += n; 382 space->local_remaining -= n; 383 PetscFunctionReturn(0); 384 } 385 386 /* 387 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 388 This function expects the values to be roworiented. Multiple columns belong 389 to the same block-row can be inserted with a single call to this function. 390 This function extracts the sub-block of values based on the dimensions of 391 the original input block, and the row,col values corresponding to the blocks. 392 393 Input Parameters: 394 stash - the stash 395 row - the global block-row correspoiding to the values 396 n - the number of elements inserted. All elements belong to the above row. 397 idxn - the global block-column indices corresponding to each of the blocks of 398 values. Each block is of size bs*bs. 399 values - the values inserted 400 rmax - the number of block-rows in the original block. 401 cmax - the number of block-columsn on the original block. 402 idx - the index of the current block-row in the original block. 403 */ 404 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 405 { 406 PetscErrorCode ierr; 407 PetscInt i,j,k,bs2,bs=stash->bs,l; 408 const PetscScalar *vals; 409 PetscScalar *array; 410 PetscMatStashSpace space=stash->space; 411 412 PetscFunctionBegin; 413 if (!space || space->local_remaining < n) { 414 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 415 } 416 space = stash->space; 417 l = space->local_used; 418 bs2 = bs*bs; 419 for (i=0; i<n; i++) { 420 space->idx[l] = row; 421 space->idy[l] = idxn[i]; 422 /* Now copy over the block of values. Store the values column oriented. 423 This enables inserting multiple blocks belonging to a row with a single 424 funtion call */ 425 array = space->val + bs2*l; 426 vals = values + idx*bs2*n + bs*i; 427 for (j=0; j<bs; j++) { 428 for (k=0; k<bs; k++) array[k] = vals[k]; 429 array += bs; 430 vals += rmax*bs; 431 } 432 l++; 433 } 434 stash->n += n; 435 space->local_used += n; 436 space->local_remaining -= n; 437 PetscFunctionReturn(0); 438 } 439 /* 440 MatStashScatterBegin_Private - Initiates the transfer of values to the 441 correct owners. This function goes through the stash, and check the 442 owners of each stashed value, and sends the values off to the owner 443 processors. 444 445 Input Parameters: 446 stash - the stash 447 owners - an array of size 'no-of-procs' which gives the ownership range 448 for each node. 449 450 Notes: The 'owners' array in the cased of the blocked-stash has the 451 ranges specified blocked global indices, and for the regular stash in 452 the proper global indices. 453 */ 454 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 455 { 456 PetscErrorCode ierr; 457 458 PetscFunctionBegin; 459 ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 464 { 465 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 466 PetscInt size=stash->size,nsends; 467 PetscErrorCode ierr; 468 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 469 PetscScalar **rvalues,*svalues; 470 MPI_Comm comm = stash->comm; 471 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 472 PetscMPIInt *sizes,*nlengths,nreceives; 473 PetscInt *sp_idx,*sp_idy; 474 PetscScalar *sp_val; 475 PetscMatStashSpace space,space_next; 476 477 PetscFunctionBegin; 478 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 479 InsertMode addv; 480 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 481 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 482 mat->insertmode = addv; /* in case this processor had no cache */ 483 } 484 485 bs2 = stash->bs*stash->bs; 486 487 /* first count number of contributors to each processor */ 488 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 489 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 490 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 491 492 i = j = 0; 493 lastidx = -1; 494 space = stash->space_head; 495 while (space) { 496 space_next = space->next; 497 sp_idx = space->idx; 498 for (l=0; l<space->local_used; l++) { 499 /* if indices are NOT locally sorted, need to start search at the beginning */ 500 if (lastidx > (idx = sp_idx[l])) j = 0; 501 lastidx = idx; 502 for (; j<size; j++) { 503 if (idx >= owners[j] && idx < owners[j+1]) { 504 nlengths[j]++; owner[i] = j; break; 505 } 506 } 507 i++; 508 } 509 space = space_next; 510 } 511 /* Now check what procs get messages - and compute nsends. */ 512 for (i=0, nsends=0; i<size; i++) { 513 if (nlengths[i]) { 514 sizes[i] = 1; nsends++; 515 } 516 } 517 518 {PetscMPIInt *onodes,*olengths; 519 /* Determine the number of messages to expect, their lengths, from from-ids */ 520 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 521 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 522 /* since clubbing row,col - lengths are multiplied by 2 */ 523 for (i=0; i<nreceives; i++) olengths[i] *=2; 524 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 525 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 526 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 527 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 528 ierr = PetscFree(onodes);CHKERRQ(ierr); 529 ierr = PetscFree(olengths);CHKERRQ(ierr);} 530 531 /* do sends: 532 1) starts[i] gives the starting index in svalues for stuff going to 533 the ith processor 534 */ 535 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 536 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 537 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 538 /* use 2 sends the first with all_a, the next with all_i and all_j */ 539 startv[0] = 0; starti[0] = 0; 540 for (i=1; i<size; i++) { 541 startv[i] = startv[i-1] + nlengths[i-1]; 542 starti[i] = starti[i-1] + 2*nlengths[i-1]; 543 } 544 545 i = 0; 546 space = stash->space_head; 547 while (space) { 548 space_next = space->next; 549 sp_idx = space->idx; 550 sp_idy = space->idy; 551 sp_val = space->val; 552 for (l=0; l<space->local_used; l++) { 553 j = owner[i]; 554 if (bs2 == 1) { 555 svalues[startv[j]] = sp_val[l]; 556 } else { 557 PetscInt k; 558 PetscScalar *buf1,*buf2; 559 buf1 = svalues+bs2*startv[j]; 560 buf2 = space->val + bs2*l; 561 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 562 } 563 sindices[starti[j]] = sp_idx[l]; 564 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 565 startv[j]++; 566 starti[j]++; 567 i++; 568 } 569 space = space_next; 570 } 571 startv[0] = 0; 572 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 573 574 for (i=0,count=0; i<size; i++) { 575 if (sizes[i]) { 576 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 577 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 578 } 579 } 580 #if defined(PETSC_USE_INFO) 581 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 582 for (i=0; i<size; i++) { 583 if (sizes[i]) { 584 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 585 } 586 } 587 #endif 588 ierr = PetscFree(nlengths);CHKERRQ(ierr); 589 ierr = PetscFree(owner);CHKERRQ(ierr); 590 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 591 ierr = PetscFree(sizes);CHKERRQ(ierr); 592 593 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 594 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 595 596 for (i=0; i<nreceives; i++) { 597 recv_waits[2*i] = recv_waits1[i]; 598 recv_waits[2*i+1] = recv_waits2[i]; 599 } 600 stash->recv_waits = recv_waits; 601 602 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 603 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 604 605 stash->svalues = svalues; 606 stash->sindices = sindices; 607 stash->rvalues = rvalues; 608 stash->rindices = rindices; 609 stash->send_waits = send_waits; 610 stash->nsends = nsends; 611 stash->nrecvs = nreceives; 612 stash->reproduce_count = 0; 613 PetscFunctionReturn(0); 614 } 615 616 /* 617 MatStashScatterGetMesg_Private - This function waits on the receives posted 618 in the function MatStashScatterBegin_Private() and returns one message at 619 a time to the calling function. If no messages are left, it indicates this 620 by setting flg = 0, else it sets flg = 1. 621 622 Input Parameters: 623 stash - the stash 624 625 Output Parameters: 626 nvals - the number of entries in the current message. 627 rows - an array of row indices (or blocked indices) corresponding to the values 628 cols - an array of columnindices (or blocked indices) corresponding to the values 629 vals - the values 630 flg - 0 indicates no more message left, and the current call has no values associated. 631 1 indicates that the current call successfully received a message, and the 632 other output parameters nvals,rows,cols,vals are set appropriately. 633 */ 634 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 635 { 636 PetscErrorCode ierr; 637 638 PetscFunctionBegin; 639 ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr); 640 PetscFunctionReturn(0); 641 } 642 643 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 644 { 645 PetscErrorCode ierr; 646 PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 647 PetscInt bs2; 648 MPI_Status recv_status; 649 PetscBool match_found = PETSC_FALSE; 650 651 PetscFunctionBegin; 652 *flg = 0; /* When a message is discovered this is reset to 1 */ 653 /* Return if no more messages to process */ 654 if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 655 656 bs2 = stash->bs*stash->bs; 657 /* If a matching pair of receives are found, process them, and return the data to 658 the calling function. Until then keep receiving messages */ 659 while (!match_found) { 660 if (stash->reproduce) { 661 i = stash->reproduce_count++; 662 ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 663 } else { 664 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 665 } 666 if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 667 668 /* Now pack the received message into a structure which is usable by others */ 669 if (i % 2) { 670 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 671 672 flg_v[2*recv_status.MPI_SOURCE] = i/2; 673 674 *nvals = *nvals/bs2; 675 } else { 676 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 677 678 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 679 680 *nvals = *nvals/2; /* This message has both row indices and col indices */ 681 } 682 683 /* Check if we have both messages from this proc */ 684 i1 = flg_v[2*recv_status.MPI_SOURCE]; 685 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 686 if (i1 != -1 && i2 != -1) { 687 *rows = stash->rindices[i2]; 688 *cols = *rows + *nvals; 689 *vals = stash->rvalues[i1]; 690 *flg = 1; 691 stash->nprocessed++; 692 match_found = PETSC_TRUE; 693 } 694 } 695 PetscFunctionReturn(0); 696 } 697 698 typedef struct { 699 PetscInt row; 700 PetscInt col; 701 PetscScalar vals[1]; /* Actually an array of length bs2 */ 702 } MatStashBlock; 703 704 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) 705 { 706 PetscErrorCode ierr; 707 PetscMatStashSpace space; 708 PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; 709 PetscScalar **valptr; 710 711 PetscFunctionBegin; 712 ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); 713 for (space=stash->space_head,cnt=0; space; space=space->next) { 714 for (i=0; i<space->local_used; i++) { 715 row[cnt] = space->idx[i]; 716 col[cnt] = space->idy[i]; 717 valptr[cnt] = &space->val[i*bs2]; 718 perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 719 cnt++; 720 } 721 } 722 if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt); 723 ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); 724 /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 725 for (rowstart=0,cnt=0,i=1; i<=n; i++) { 726 if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 727 PetscInt colstart; 728 ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); 729 for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */ 730 PetscInt j,l; 731 MatStashBlock *block; 732 ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); 733 block->row = row[rowstart]; 734 block->col = col[colstart]; 735 ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 736 for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 737 if (insertmode == ADD_VALUES) { 738 for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; 739 } else { 740 ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 741 } 742 } 743 colstart = j; 744 } 745 rowstart = i; 746 } 747 } 748 ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 752 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 753 { 754 PetscErrorCode ierr; 755 756 PetscFunctionBegin; 757 if (stash->blocktype == MPI_DATATYPE_NULL) { 758 PetscInt bs2 = PetscSqr(stash->bs); 759 PetscMPIInt blocklens[2]; 760 MPI_Aint displs[2]; 761 MPI_Datatype types[2],stype; 762 /* C++ std::complex is not my favorite datatype. Since it is not POD, we cannot use offsetof to find the offset of 763 * vals. But the layout is actually guaranteed by the standard, so we do a little dance here with struct 764 * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset. 765 */ 766 struct DummyBlock {PetscInt row,col; PetscReal vals;}; 767 768 stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar); 769 if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 770 stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 771 } 772 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr); 773 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr); 774 ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr); 775 blocklens[0] = 2; 776 blocklens[1] = bs2; 777 displs[0] = offsetof(struct DummyBlock,row); 778 displs[1] = offsetof(struct DummyBlock,vals); 779 types[0] = MPIU_INT; 780 types[1] = MPIU_SCALAR; 781 ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr); 782 ierr = MPI_Type_commit(&stype);CHKERRQ(ierr); 783 ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */ 784 ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr); 785 ierr = MPI_Type_free(&stype);CHKERRQ(ierr); 786 } 787 PetscFunctionReturn(0); 788 } 789 790 /* Callback invoked after target rank has initiatied receive of rendezvous message. 791 * Here we post the main sends. 792 */ 793 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx) 794 { 795 MatStash *stash = (MatStash*)ctx; 796 MatStashHeader *hdr = (MatStashHeader*)sdata; 797 PetscErrorCode ierr; 798 799 PetscFunctionBegin; 800 if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]); 801 ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 802 stash->sendframes[rankid].count = hdr->count; 803 stash->sendframes[rankid].pending = 1; 804 PetscFunctionReturn(0); 805 } 806 807 /* Callback invoked by target after receiving rendezvous message. 808 * Here we post the main recvs. 809 */ 810 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx) 811 { 812 MatStash *stash = (MatStash*)ctx; 813 MatStashHeader *hdr = (MatStashHeader*)rdata; 814 MatStashFrame *frame; 815 PetscErrorCode ierr; 816 817 PetscFunctionBegin; 818 ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr); 819 ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr); 820 ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 821 frame->count = hdr->count; 822 frame->pending = 1; 823 PetscFunctionReturn(0); 824 } 825 826 #if !defined(PETSC_HAVE_MPIUNI) 827 /* 828 * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 829 */ 830 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[]) 831 { 832 PetscErrorCode ierr; 833 size_t nblocks; 834 char *sendblocks; 835 836 PetscFunctionBegin; 837 #if defined(PETSC_USE_DEBUG) 838 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 839 InsertMode addv; 840 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 841 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 842 } 843 #endif 844 845 if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */ 846 ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 847 } 848 849 ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr); 850 ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr); 851 ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr); 852 ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr); 853 if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 854 PetscInt i; 855 size_t b; 856 for (i=0,b=0; i<stash->nsendranks; i++) { 857 stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size]; 858 /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 859 stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 860 for ( ; b<nblocks; b++) { 861 MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size]; 862 if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]); 863 if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break; 864 stash->sendhdr[i].count++; 865 } 866 } 867 } else { /* Dynamically count and pack (first time) */ 868 PetscInt sendno; 869 size_t i,rowstart; 870 871 /* Count number of send ranks and allocate for sends */ 872 stash->nsendranks = 0; 873 for (rowstart=0; rowstart<nblocks; ) { 874 PetscInt owner; 875 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 876 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 877 if (owner < 0) owner = -(owner+2); 878 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 879 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 880 if (sendblock_i->row >= owners[owner+1]) break; 881 } 882 stash->nsendranks++; 883 rowstart = i; 884 } 885 ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr); 886 887 /* Set up sendhdrs and sendframes */ 888 sendno = 0; 889 for (rowstart=0; rowstart<nblocks; ) { 890 PetscInt owner; 891 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 892 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 893 if (owner < 0) owner = -(owner+2); 894 stash->sendranks[sendno] = owner; 895 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 896 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 897 if (sendblock_i->row >= owners[owner+1]) break; 898 } 899 stash->sendframes[sendno].buffer = sendblock_rowstart; 900 stash->sendframes[sendno].pending = 0; 901 stash->sendhdr[sendno].count = i - rowstart; 902 sendno++; 903 rowstart = i; 904 } 905 if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno); 906 } 907 908 /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 909 * message or a dummy entry of some sort. */ 910 if (mat->insertmode == INSERT_VALUES) { 911 size_t i; 912 for (i=0; i<nblocks; i++) { 913 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 914 sendblock_i->row = -(sendblock_i->row+1); 915 } 916 } 917 918 if (stash->subset_off_proc && mat->subsetoffprocentries) { 919 PetscMPIInt i,tag; 920 ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr); 921 for (i=0; i<stash->nrecvranks; i++) { 922 ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr); 923 } 924 for (i=0; i<stash->nsendranks; i++) { 925 ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr); 926 } 927 stash->use_status = PETSC_TRUE; /* Use count from message status. */ 928 } else { 929 ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr, 930 &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs, 931 MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr); 932 ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr); 933 stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 934 } 935 936 ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr); 937 stash->recvframe_active = NULL; 938 stash->recvframe_i = 0; 939 stash->some_i = 0; 940 stash->some_count = 0; 941 stash->recvcount = 0; 942 stash->subset_off_proc = mat->subsetoffprocentries; 943 stash->insertmode = &mat->insertmode; 944 PetscFunctionReturn(0); 945 } 946 947 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg) 948 { 949 PetscErrorCode ierr; 950 MatStashBlock *block; 951 952 PetscFunctionBegin; 953 *flg = 0; 954 while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 955 if (stash->some_i == stash->some_count) { 956 if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 957 ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr); 958 stash->some_i = 0; 959 } 960 stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 961 stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 962 if (stash->use_status) { /* Count what was actually sent */ 963 ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr); 964 } 965 if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 966 block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0]; 967 if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 968 if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 969 if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 970 } 971 stash->some_i++; 972 stash->recvcount++; 973 stash->recvframe_i = 0; 974 } 975 *n = 1; 976 block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size]; 977 if (block->row < 0) block->row = -(block->row + 1); 978 *row = &block->row; 979 *col = &block->col; 980 *val = block->vals; 981 stash->recvframe_i++; 982 *flg = 1; 983 PetscFunctionReturn(0); 984 } 985 986 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 987 { 988 PetscErrorCode ierr; 989 990 PetscFunctionBegin; 991 ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 992 if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 993 void *dummy; 994 ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr); 995 } else { /* No reuse, so collect everything. */ 996 ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 997 } 998 999 /* Now update nmaxold to be app 10% more than max n used, this way the 1000 wastage of space is reduced the next time this stash is used. 1001 Also update the oldmax, only if it increases */ 1002 if (stash->n) { 1003 PetscInt bs2 = stash->bs*stash->bs; 1004 PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 1005 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 1006 } 1007 1008 stash->nmax = 0; 1009 stash->n = 0; 1010 stash->reallocs = -1; 1011 stash->nprocessed = 0; 1012 1013 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1014 1015 stash->space = 0; 1016 1017 PetscFunctionReturn(0); 1018 } 1019 1020 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 1021 { 1022 PetscErrorCode ierr; 1023 1024 PetscFunctionBegin; 1025 ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr); 1026 ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr); 1027 stash->recvframes = NULL; 1028 ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr); 1029 if (stash->blocktype != MPI_DATATYPE_NULL) { 1030 ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr); 1031 } 1032 stash->nsendranks = 0; 1033 stash->nrecvranks = 0; 1034 ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr); 1035 ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr); 1036 ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr); 1037 ierr = PetscFree(stash->recvranks);CHKERRQ(ierr); 1038 ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr); 1039 ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr); 1040 PetscFunctionReturn(0); 1041 } 1042 #endif 1043