1 2 #include <petsc-private/matimpl.h> 3 4 #define DEFAULT_STASH_SIZE 10000 5 6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 9 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*); 10 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 11 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*); 12 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*); 13 14 /* 15 MatStashCreate_Private - Creates a stash,currently used for all the parallel 16 matrix implementations. The stash is where elements of a matrix destined 17 to be stored on other processors are kept until matrix assembly is done. 18 19 This is a simple minded stash. Simply adds entries to end of stash. 20 21 Input Parameters: 22 comm - communicator, required for scatters. 23 bs - stash block size. used when stashing blocks of values 24 25 Output Parameters: 26 stash - the newly created stash 27 */ 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatStashCreate_Private" 30 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 31 { 32 PetscErrorCode ierr; 33 PetscInt max,*opt,nopt,i; 34 PetscBool flg; 35 36 PetscFunctionBegin; 37 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 38 stash->comm = comm; 39 40 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 41 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 42 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 43 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 44 ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); 45 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 46 47 48 nopt = stash->size; 49 ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); 50 ierr = PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 51 if (flg) { 52 if (nopt == 1) max = opt[0]; 53 else if (nopt == stash->size) max = opt[stash->rank]; 54 else if (stash->rank < nopt) max = opt[stash->rank]; 55 else max = 0; /* Use default */ 56 stash->umax = max; 57 } else { 58 stash->umax = 0; 59 } 60 ierr = PetscFree(opt);CHKERRQ(ierr); 61 if (bs <= 0) bs = 1; 62 63 stash->bs = bs; 64 stash->nmax = 0; 65 stash->oldnmax = 0; 66 stash->n = 0; 67 stash->reallocs = -1; 68 stash->space_head = 0; 69 stash->space = 0; 70 71 stash->send_waits = 0; 72 stash->recv_waits = 0; 73 stash->send_status = 0; 74 stash->nsends = 0; 75 stash->nrecvs = 0; 76 stash->svalues = 0; 77 stash->rvalues = 0; 78 stash->rindices = 0; 79 stash->nprocessed = 0; 80 stash->reproduce = PETSC_FALSE; 81 stash->blocktype = MPI_DATATYPE_NULL; 82 83 ierr = PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); 84 ierr = PetscOptionsGetBool(NULL,"-matstash_bts",&flg,NULL);CHKERRQ(ierr); 85 if (flg) { 86 stash->ScatterBegin = MatStashScatterBegin_BTS; 87 stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 88 stash->ScatterEnd = MatStashScatterEnd_BTS; 89 stash->ScatterDestroy = MatStashScatterDestroy_BTS; 90 } else { 91 stash->ScatterBegin = MatStashScatterBegin_Ref; 92 stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 93 stash->ScatterEnd = MatStashScatterEnd_Ref; 94 stash->ScatterDestroy = NULL; 95 } 96 PetscFunctionReturn(0); 97 } 98 99 /* 100 MatStashDestroy_Private - Destroy the stash 101 */ 102 #undef __FUNCT__ 103 #define __FUNCT__ "MatStashDestroy_Private" 104 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 105 { 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 110 if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);} 111 112 stash->space = 0; 113 114 ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 /* 119 MatStashScatterEnd_Private - This is called as the final stage of 120 scatter. The final stages of message passing is done here, and 121 all the memory used for message passing is cleaned up. This 122 routine also resets the stash, and deallocates the memory used 123 for the stash. It also keeps track of the current memory usage 124 so that the same value can be used the next time through. 125 */ 126 #undef __FUNCT__ 127 #define __FUNCT__ "MatStashScatterEnd_Private" 128 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 129 { 130 PetscErrorCode ierr; 131 132 PetscFunctionBegin; 133 ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatStashScatterEnd_Ref" 139 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 140 { 141 PetscErrorCode ierr; 142 PetscInt nsends=stash->nsends,bs2,oldnmax,i; 143 MPI_Status *send_status; 144 145 PetscFunctionBegin; 146 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 147 /* wait on sends */ 148 if (nsends) { 149 ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr); 150 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 151 ierr = PetscFree(send_status);CHKERRQ(ierr); 152 } 153 154 /* Now update nmaxold to be app 10% more than max n used, this way the 155 wastage of space is reduced the next time this stash is used. 156 Also update the oldmax, only if it increases */ 157 if (stash->n) { 158 bs2 = stash->bs*stash->bs; 159 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 160 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 161 } 162 163 stash->nmax = 0; 164 stash->n = 0; 165 stash->reallocs = -1; 166 stash->nprocessed = 0; 167 168 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 169 170 stash->space = 0; 171 172 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 173 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 174 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 175 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 176 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 177 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 178 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 /* 183 MatStashGetInfo_Private - Gets the relavant statistics of the stash 184 185 Input Parameters: 186 stash - the stash 187 nstash - the size of the stash. Indicates the number of values stored. 188 reallocs - the number of additional mallocs incurred. 189 190 */ 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatStashGetInfo_Private" 193 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 194 { 195 PetscInt bs2 = stash->bs*stash->bs; 196 197 PetscFunctionBegin; 198 if (nstash) *nstash = stash->n*bs2; 199 if (reallocs) { 200 if (stash->reallocs < 0) *reallocs = 0; 201 else *reallocs = stash->reallocs; 202 } 203 PetscFunctionReturn(0); 204 } 205 206 /* 207 MatStashSetInitialSize_Private - Sets the initial size of the stash 208 209 Input Parameters: 210 stash - the stash 211 max - the value that is used as the max size of the stash. 212 this value is used while allocating memory. 213 */ 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatStashSetInitialSize_Private" 216 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 217 { 218 PetscFunctionBegin; 219 stash->umax = max; 220 PetscFunctionReturn(0); 221 } 222 223 /* MatStashExpand_Private - Expand the stash. This function is called 224 when the space in the stash is not sufficient to add the new values 225 being inserted into the stash. 226 227 Input Parameters: 228 stash - the stash 229 incr - the minimum increase requested 230 231 Notes: 232 This routine doubles the currently used memory. 233 */ 234 #undef __FUNCT__ 235 #define __FUNCT__ "MatStashExpand_Private" 236 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 237 { 238 PetscErrorCode ierr; 239 PetscInt newnmax,bs2= stash->bs*stash->bs; 240 241 PetscFunctionBegin; 242 /* allocate a larger stash */ 243 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 244 if (stash->umax) newnmax = stash->umax/bs2; 245 else newnmax = DEFAULT_STASH_SIZE/bs2; 246 } else if (!stash->nmax) { /* resuing stash */ 247 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 248 else newnmax = stash->oldnmax/bs2; 249 } else newnmax = stash->nmax*2; 250 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 251 252 /* Get a MatStashSpace and attach it to stash */ 253 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 254 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 255 stash->space_head = stash->space; 256 } 257 258 stash->reallocs++; 259 stash->nmax = newnmax; 260 PetscFunctionReturn(0); 261 } 262 /* 263 MatStashValuesRow_Private - inserts values into the stash. This function 264 expects the values to be roworiented. Multiple columns belong to the same row 265 can be inserted with a single call to this function. 266 267 Input Parameters: 268 stash - the stash 269 row - the global row correspoiding to the values 270 n - the number of elements inserted. All elements belong to the above row. 271 idxn - the global column indices corresponding to each of the values. 272 values - the values inserted 273 */ 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatStashValuesRow_Private" 276 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 277 { 278 PetscErrorCode ierr; 279 PetscInt i,k,cnt = 0; 280 PetscMatStashSpace space=stash->space; 281 282 PetscFunctionBegin; 283 /* Check and see if we have sufficient memory */ 284 if (!space || space->local_remaining < n) { 285 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 286 } 287 space = stash->space; 288 k = space->local_used; 289 for (i=0; i<n; i++) { 290 if (ignorezeroentries && (values[i] == 0.0)) continue; 291 space->idx[k] = row; 292 space->idy[k] = idxn[i]; 293 space->val[k] = values[i]; 294 k++; 295 cnt++; 296 } 297 stash->n += cnt; 298 space->local_used += cnt; 299 space->local_remaining -= cnt; 300 PetscFunctionReturn(0); 301 } 302 303 /* 304 MatStashValuesCol_Private - inserts values into the stash. This function 305 expects the values to be columnoriented. Multiple columns belong to the same row 306 can be inserted with a single call to this function. 307 308 Input Parameters: 309 stash - the stash 310 row - the global row correspoiding to the values 311 n - the number of elements inserted. All elements belong to the above row. 312 idxn - the global column indices corresponding to each of the values. 313 values - the values inserted 314 stepval - the consecutive values are sepated by a distance of stepval. 315 this happens because the input is columnoriented. 316 */ 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatStashValuesCol_Private" 319 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 320 { 321 PetscErrorCode ierr; 322 PetscInt i,k,cnt = 0; 323 PetscMatStashSpace space=stash->space; 324 325 PetscFunctionBegin; 326 /* Check and see if we have sufficient memory */ 327 if (!space || space->local_remaining < n) { 328 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 329 } 330 space = stash->space; 331 k = space->local_used; 332 for (i=0; i<n; i++) { 333 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 334 space->idx[k] = row; 335 space->idy[k] = idxn[i]; 336 space->val[k] = values[i*stepval]; 337 k++; 338 cnt++; 339 } 340 stash->n += cnt; 341 space->local_used += cnt; 342 space->local_remaining -= cnt; 343 PetscFunctionReturn(0); 344 } 345 346 /* 347 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 348 This function expects the values to be roworiented. Multiple columns belong 349 to the same block-row can be inserted with a single call to this function. 350 This function extracts the sub-block of values based on the dimensions of 351 the original input block, and the row,col values corresponding to the blocks. 352 353 Input Parameters: 354 stash - the stash 355 row - the global block-row correspoiding to the values 356 n - the number of elements inserted. All elements belong to the above row. 357 idxn - the global block-column indices corresponding to each of the blocks of 358 values. Each block is of size bs*bs. 359 values - the values inserted 360 rmax - the number of block-rows in the original block. 361 cmax - the number of block-columsn on the original block. 362 idx - the index of the current block-row in the original block. 363 */ 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatStashValuesRowBlocked_Private" 366 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 367 { 368 PetscErrorCode ierr; 369 PetscInt i,j,k,bs2,bs=stash->bs,l; 370 const PetscScalar *vals; 371 PetscScalar *array; 372 PetscMatStashSpace space=stash->space; 373 374 PetscFunctionBegin; 375 if (!space || space->local_remaining < n) { 376 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 377 } 378 space = stash->space; 379 l = space->local_used; 380 bs2 = bs*bs; 381 for (i=0; i<n; i++) { 382 space->idx[l] = row; 383 space->idy[l] = idxn[i]; 384 /* Now copy over the block of values. Store the values column oriented. 385 This enables inserting multiple blocks belonging to a row with a single 386 funtion call */ 387 array = space->val + bs2*l; 388 vals = values + idx*bs2*n + bs*i; 389 for (j=0; j<bs; j++) { 390 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 391 array++; 392 vals += cmax*bs; 393 } 394 l++; 395 } 396 stash->n += n; 397 space->local_used += n; 398 space->local_remaining -= n; 399 PetscFunctionReturn(0); 400 } 401 402 /* 403 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 404 This function expects the values to be roworiented. Multiple columns belong 405 to the same block-row can be inserted with a single call to this function. 406 This function extracts the sub-block of values based on the dimensions of 407 the original input block, and the row,col values corresponding to the blocks. 408 409 Input Parameters: 410 stash - the stash 411 row - the global block-row correspoiding to the values 412 n - the number of elements inserted. All elements belong to the above row. 413 idxn - the global block-column indices corresponding to each of the blocks of 414 values. Each block is of size bs*bs. 415 values - the values inserted 416 rmax - the number of block-rows in the original block. 417 cmax - the number of block-columsn on the original block. 418 idx - the index of the current block-row in the original block. 419 */ 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatStashValuesColBlocked_Private" 422 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 423 { 424 PetscErrorCode ierr; 425 PetscInt i,j,k,bs2,bs=stash->bs,l; 426 const PetscScalar *vals; 427 PetscScalar *array; 428 PetscMatStashSpace space=stash->space; 429 430 PetscFunctionBegin; 431 if (!space || space->local_remaining < n) { 432 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 433 } 434 space = stash->space; 435 l = space->local_used; 436 bs2 = bs*bs; 437 for (i=0; i<n; i++) { 438 space->idx[l] = row; 439 space->idy[l] = idxn[i]; 440 /* Now copy over the block of values. Store the values column oriented. 441 This enables inserting multiple blocks belonging to a row with a single 442 funtion call */ 443 array = space->val + bs2*l; 444 vals = values + idx*bs2*n + bs*i; 445 for (j=0; j<bs; j++) { 446 for (k=0; k<bs; k++) array[k] = vals[k]; 447 array += bs; 448 vals += rmax*bs; 449 } 450 l++; 451 } 452 stash->n += n; 453 space->local_used += n; 454 space->local_remaining -= n; 455 PetscFunctionReturn(0); 456 } 457 /* 458 MatStashScatterBegin_Private - Initiates the transfer of values to the 459 correct owners. This function goes through the stash, and check the 460 owners of each stashed value, and sends the values off to the owner 461 processors. 462 463 Input Parameters: 464 stash - the stash 465 owners - an array of size 'no-of-procs' which gives the ownership range 466 for each node. 467 468 Notes: The 'owners' array in the cased of the blocked-stash has the 469 ranges specified blocked global indices, and for the regular stash in 470 the proper global indices. 471 */ 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatStashScatterBegin_Private" 474 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 475 { 476 PetscErrorCode ierr; 477 478 PetscFunctionBegin; 479 ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatStashScatterBegin_Ref" 485 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 486 { 487 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 488 PetscInt size=stash->size,nsends; 489 PetscErrorCode ierr; 490 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 491 PetscScalar **rvalues,*svalues; 492 MPI_Comm comm = stash->comm; 493 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 494 PetscMPIInt *sizes,*nlengths,nreceives; 495 PetscInt *sp_idx,*sp_idy; 496 PetscScalar *sp_val; 497 PetscMatStashSpace space,space_next; 498 499 PetscFunctionBegin; 500 bs2 = stash->bs*stash->bs; 501 502 /* first count number of contributors to each processor */ 503 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 504 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 505 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 506 507 i = j = 0; 508 lastidx = -1; 509 space = stash->space_head; 510 while (space != NULL) { 511 space_next = space->next; 512 sp_idx = space->idx; 513 for (l=0; l<space->local_used; l++) { 514 /* if indices are NOT locally sorted, need to start search at the beginning */ 515 if (lastidx > (idx = sp_idx[l])) j = 0; 516 lastidx = idx; 517 for (; j<size; j++) { 518 if (idx >= owners[j] && idx < owners[j+1]) { 519 nlengths[j]++; owner[i] = j; break; 520 } 521 } 522 i++; 523 } 524 space = space_next; 525 } 526 /* Now check what procs get messages - and compute nsends. */ 527 for (i=0, nsends=0; i<size; i++) { 528 if (nlengths[i]) { 529 sizes[i] = 1; nsends++; 530 } 531 } 532 533 {PetscMPIInt *onodes,*olengths; 534 /* Determine the number of messages to expect, their lengths, from from-ids */ 535 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 536 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 537 /* since clubbing row,col - lengths are multiplied by 2 */ 538 for (i=0; i<nreceives; i++) olengths[i] *=2; 539 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 540 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 541 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 542 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 543 ierr = PetscFree(onodes);CHKERRQ(ierr); 544 ierr = PetscFree(olengths);CHKERRQ(ierr);} 545 546 /* do sends: 547 1) starts[i] gives the starting index in svalues for stuff going to 548 the ith processor 549 */ 550 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 551 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 552 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 553 /* use 2 sends the first with all_a, the next with all_i and all_j */ 554 startv[0] = 0; starti[0] = 0; 555 for (i=1; i<size; i++) { 556 startv[i] = startv[i-1] + nlengths[i-1]; 557 starti[i] = starti[i-1] + 2*nlengths[i-1]; 558 } 559 560 i = 0; 561 space = stash->space_head; 562 while (space != NULL) { 563 space_next = space->next; 564 sp_idx = space->idx; 565 sp_idy = space->idy; 566 sp_val = space->val; 567 for (l=0; l<space->local_used; l++) { 568 j = owner[i]; 569 if (bs2 == 1) { 570 svalues[startv[j]] = sp_val[l]; 571 } else { 572 PetscInt k; 573 PetscScalar *buf1,*buf2; 574 buf1 = svalues+bs2*startv[j]; 575 buf2 = space->val + bs2*l; 576 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 577 } 578 sindices[starti[j]] = sp_idx[l]; 579 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 580 startv[j]++; 581 starti[j]++; 582 i++; 583 } 584 space = space_next; 585 } 586 startv[0] = 0; 587 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 588 589 for (i=0,count=0; i<size; i++) { 590 if (sizes[i]) { 591 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 592 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 593 } 594 } 595 #if defined(PETSC_USE_INFO) 596 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 597 for (i=0; i<size; i++) { 598 if (sizes[i]) { 599 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 600 } 601 } 602 #endif 603 ierr = PetscFree(nlengths);CHKERRQ(ierr); 604 ierr = PetscFree(owner);CHKERRQ(ierr); 605 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 606 ierr = PetscFree(sizes);CHKERRQ(ierr); 607 608 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 609 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 610 611 for (i=0; i<nreceives; i++) { 612 recv_waits[2*i] = recv_waits1[i]; 613 recv_waits[2*i+1] = recv_waits2[i]; 614 } 615 stash->recv_waits = recv_waits; 616 617 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 618 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 619 620 stash->svalues = svalues; 621 stash->sindices = sindices; 622 stash->rvalues = rvalues; 623 stash->rindices = rindices; 624 stash->send_waits = send_waits; 625 stash->nsends = nsends; 626 stash->nrecvs = nreceives; 627 stash->reproduce_count = 0; 628 PetscFunctionReturn(0); 629 } 630 631 /* 632 MatStashScatterGetMesg_Private - This function waits on the receives posted 633 in the function MatStashScatterBegin_Private() and returns one message at 634 a time to the calling function. If no messages are left, it indicates this 635 by setting flg = 0, else it sets flg = 1. 636 637 Input Parameters: 638 stash - the stash 639 640 Output Parameters: 641 nvals - the number of entries in the current message. 642 rows - an array of row indices (or blocked indices) corresponding to the values 643 cols - an array of columnindices (or blocked indices) corresponding to the values 644 vals - the values 645 flg - 0 indicates no more message left, and the current call has no values associated. 646 1 indicates that the current call successfully received a message, and the 647 other output parameters nvals,rows,cols,vals are set appropriately. 648 */ 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatStashScatterGetMesg_Private" 651 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 652 { 653 PetscErrorCode ierr; 654 655 PetscFunctionBegin; 656 ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr); 657 PetscFunctionReturn(0); 658 } 659 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatStashScatterGetMesg_Ref" 662 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 663 { 664 PetscErrorCode ierr; 665 PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 666 PetscInt bs2; 667 MPI_Status recv_status; 668 PetscBool match_found = PETSC_FALSE; 669 670 PetscFunctionBegin; 671 *flg = 0; /* When a message is discovered this is reset to 1 */ 672 /* Return if no more messages to process */ 673 if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 674 675 bs2 = stash->bs*stash->bs; 676 /* If a matching pair of receives are found, process them, and return the data to 677 the calling function. Until then keep receiving messages */ 678 while (!match_found) { 679 if (stash->reproduce) { 680 i = stash->reproduce_count++; 681 ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 682 } else { 683 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 684 } 685 if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 686 687 /* Now pack the received message into a structure which is usable by others */ 688 if (i % 2) { 689 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 690 691 flg_v[2*recv_status.MPI_SOURCE] = i/2; 692 693 *nvals = *nvals/bs2; 694 } else { 695 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 696 697 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 698 699 *nvals = *nvals/2; /* This message has both row indices and col indices */ 700 } 701 702 /* Check if we have both messages from this proc */ 703 i1 = flg_v[2*recv_status.MPI_SOURCE]; 704 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 705 if (i1 != -1 && i2 != -1) { 706 *rows = stash->rindices[i2]; 707 *cols = *rows + *nvals; 708 *vals = stash->rvalues[i1]; 709 *flg = 1; 710 stash->nprocessed++; 711 match_found = PETSC_TRUE; 712 } 713 } 714 PetscFunctionReturn(0); 715 } 716 717 typedef struct { 718 PetscInt row; 719 PetscInt col; 720 PetscScalar vals[1]; /* Actually an array of length bs2 */ 721 } MatStashBlock; 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatStashSortCompress_Private" 725 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) 726 { 727 PetscErrorCode ierr; 728 PetscMatStashSpace space; 729 PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; 730 PetscScalar **valptr; 731 732 PetscFunctionBegin; 733 ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); 734 for (space=stash->space_head,cnt=0; space; space=space->next) { 735 for (i=0; i<space->local_used; i++) { 736 row[cnt] = space->idx[i]; 737 col[cnt] = space->idy[i]; 738 valptr[cnt] = &space->val[i*bs2]; 739 perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 740 cnt++; 741 } 742 } 743 if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt); 744 ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); 745 /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 746 for (rowstart=0,cnt=0,i=1; i<=n; i++) { 747 if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 748 PetscInt colstart; 749 ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); 750 for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */ 751 PetscInt j,l; 752 MatStashBlock *block; 753 ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); 754 block->row = row[rowstart]; 755 block->col = col[colstart]; 756 ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 757 for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 758 if (insertmode == ADD_VALUES) { 759 for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; 760 } else { 761 ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 762 } 763 } 764 colstart = j; 765 } 766 rowstart = i; 767 } 768 } 769 ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNCT__ 774 #define __FUNCT__ "MatStashBlockTypeSetUp" 775 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 776 { 777 PetscErrorCode ierr; 778 779 PetscFunctionBegin; 780 if (stash->blocktype == MPI_DATATYPE_NULL) { 781 PetscInt bs2 = PetscSqr(stash->bs); 782 PetscMPIInt blocklens[2]; 783 MPI_Aint displs[2]; 784 MPI_Datatype types[2],stype; 785 786 stash->blocktype_size = offsetof(MatStashBlock,vals) + bs2*sizeof(PetscScalar); 787 if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 788 stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 789 } 790 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr); 791 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr); 792 ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr); 793 blocklens[0] = 2; 794 blocklens[1] = bs2; 795 displs[0] = offsetof(MatStashBlock,row); 796 displs[1] = offsetof(MatStashBlock,vals); 797 types[0] = MPIU_INT; 798 types[1] = MPIU_SCALAR; 799 ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr); 800 ierr = MPI_Type_commit(&stype);CHKERRQ(ierr); 801 ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */ 802 ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr); 803 ierr = MPI_Type_free(&stype);CHKERRQ(ierr); 804 } 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatStashBTSSend_Private" 810 /* Callback invoked after target rank has initiatied receive of rendezvous message. 811 * Here we post the main sends. 812 */ 813 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx) 814 { 815 MatStash *stash = (MatStash*)ctx; 816 MatStashHeader *hdr = (MatStashHeader*)sdata; 817 PetscErrorCode ierr; 818 819 PetscFunctionBegin; 820 if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]); 821 ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 822 stash->sendframes[rankid].count = hdr->count; 823 stash->sendframes[rankid].pending = 1; 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatStashBTSRecv_Private" 829 /* Callback invoked by target after receiving rendezvous message. 830 * Here we post the main recvs. 831 */ 832 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx) 833 { 834 MatStash *stash = (MatStash*)ctx; 835 MatStashHeader *hdr = (MatStashHeader*)rdata; 836 MatStashFrame *frame; 837 PetscErrorCode ierr; 838 839 PetscFunctionBegin; 840 ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr); 841 ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr); 842 ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 843 frame->count = hdr->count; 844 frame->pending = 1; 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatStashScatterBegin_BTS" 850 /* 851 * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 852 */ 853 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[]) 854 { 855 PetscErrorCode ierr; 856 size_t nblocks; 857 char *sendblocks; 858 859 PetscFunctionBegin; 860 ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr); 861 ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr); 862 ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr); 863 ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr); 864 { 865 PetscInt i,rowstart,sendno; 866 867 /* Count number of send ranks and allocate for sends */ 868 stash->nsendranks = 0; 869 for (rowstart=0; rowstart<nblocks; ) { 870 PetscInt lastowner,owner; 871 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 872 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 873 if (owner < 0) owner = -(owner+2); 874 lastowner = owner; 875 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 876 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 877 if (sendblock_i->row == sendblock_rowstart->row) continue; 878 ierr = PetscFindInt(sendblock_i->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 879 if (owner < 0) owner = -(owner+2); 880 if (owner != lastowner) break; 881 } 882 stash->nsendranks++; 883 rowstart = i; 884 } 885 ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr); 886 887 /* Set up sendhdrs and sendframes */ 888 sendno = 0; 889 for (rowstart=0; rowstart<nblocks; ) { 890 PetscInt owner; 891 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 892 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 893 if (owner < 0) owner = -(owner+2); 894 stash->sendranks[sendno] = owner; 895 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 896 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 897 if (sendblock_i->row == sendblock_rowstart->row) continue; 898 ierr = PetscFindInt(sendblock_i->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 899 if (owner < 0) owner = -(owner+2); 900 if (owner != stash->sendranks[sendno]) break; 901 } 902 stash->sendframes[sendno].buffer = sendblock_rowstart; 903 stash->sendframes[sendno].pending = 0; 904 stash->sendhdr[sendno].count = i - rowstart; 905 stash->sendhdr[sendno].insertmode = mat->insertmode; 906 sendno++; 907 rowstart = i; 908 } 909 if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno); 910 } 911 912 ierr = PetscCommBuildTwoSidedFReq(stash->comm,2,MPIU_INT,stash->nsendranks,stash->sendranks,stash->sendhdr, 913 &stash->nrecvranks,&stash->recvranks,&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs, 914 MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr); 915 916 ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr); 917 ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr); 918 stash->recvframe_active = NULL; 919 stash->recvframe_i = 0; 920 stash->some_i = 0; 921 stash->some_count = 0; 922 stash->recvcount = 0; 923 stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 924 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatStashScatterGetMesg_BTS" 930 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg) 931 { 932 PetscErrorCode ierr; 933 MatStashBlock *block; 934 935 PetscFunctionBegin; 936 *flg = 0; 937 while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 938 if (stash->some_i == stash->some_count) { 939 if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 940 ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr); 941 stash->some_i = 0; 942 } 943 stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 944 stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 945 if (stash->use_status) { /* Count what was actually sent */ 946 ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr); 947 } 948 stash->some_i++; 949 stash->recvcount++; 950 stash->recvframe_i = 0; 951 } 952 *n = 1; 953 block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size]; 954 *row = &block->row; 955 *col = &block->col; 956 *val = block->vals; 957 stash->recvframe_i++; 958 *flg = 1; 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatStashScatterEnd_BTS" 964 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 965 { 966 PetscErrorCode ierr; 967 968 PetscFunctionBegin; 969 ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 970 ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 971 972 /* Now update nmaxold to be app 10% more than max n used, this way the 973 wastage of space is reduced the next time this stash is used. 974 Also update the oldmax, only if it increases */ 975 if (stash->n) { 976 PetscInt bs2 = stash->bs*stash->bs; 977 PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 978 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 979 } 980 981 stash->nmax = 0; 982 stash->n = 0; 983 stash->reallocs = -1; 984 stash->nprocessed = 0; 985 986 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 987 988 stash->space = 0; 989 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatStashScatterDestroy_BTS" 995 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 996 { 997 PetscErrorCode ierr; 998 999 PetscFunctionBegin; 1000 ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr); 1001 ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr); 1002 stash->recvframes = NULL; 1003 ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr); 1004 if (stash->blocktype != MPI_DATATYPE_NULL) { 1005 ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr); 1006 } 1007 stash->nsendranks = 0; 1008 stash->nrecvranks = 0; 1009 ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr); 1010 ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr); 1011 ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr); 1012 ierr = PetscFree(stash->recvranks);CHKERRQ(ierr); 1013 ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr); 1014 ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017