1 #define PETSCMAT_DLL 2 3 #include "src/mat/matimpl.h" 4 #include "src/mat/utils/matstashspace.h" 5 #undef MV 6 /* 7 The input to the stash is ALWAYS in MatScalar precision, and the 8 internal storage and output is also in MatScalar. 9 */ 10 #define DEFAULT_STASH_SIZE 10000 11 12 /* 13 MatStashCreate_Private - Creates a stash,currently used for all the parallel 14 matrix implementations. The stash is where elements of a matrix destined 15 to be stored on other processors are kept until matrix assembly is done. 16 17 This is a simple minded stash. Simply adds entries to end of stash. 18 19 Input Parameters: 20 comm - communicator, required for scatters. 21 bs - stash block size. used when stashing blocks of values 22 23 Output Parameters: 24 stash - the newly created stash 25 */ 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatStashCreate_Private" 28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 29 { 30 PetscErrorCode ierr; 31 PetscInt max,*opt,nopt; 32 PetscTruth flg; 33 34 PetscFunctionBegin; 35 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 36 stash->comm = comm; 37 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 38 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 39 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 40 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 41 42 nopt = stash->size; 43 ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 44 ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 45 if (flg) { 46 if (nopt == 1) max = opt[0]; 47 else if (nopt == stash->size) max = opt[stash->rank]; 48 else if (stash->rank < nopt) max = opt[stash->rank]; 49 else max = 0; /* Use default */ 50 stash->umax = max; 51 } else { 52 stash->umax = 0; 53 } 54 ierr = PetscFree(opt);CHKERRQ(ierr); 55 if (bs <= 0) bs = 1; 56 57 stash->bs = bs; 58 stash->nmax = 0; 59 stash->oldnmax = 0; 60 stash->n = 0; 61 stash->reallocs = -1; 62 #ifdef MV 63 stash->idx = 0; 64 stash->idy = 0; 65 stash->array = 0; 66 #endif 67 stash->space_head = 0; 68 stash->space = 0; 69 70 stash->send_waits = 0; 71 stash->recv_waits = 0; 72 stash->send_status = 0; 73 stash->nsends = 0; 74 stash->nrecvs = 0; 75 stash->svalues = 0; 76 stash->rvalues = 0; 77 stash->rindices = 0; 78 stash->nprocs = 0; 79 stash->nprocessed = 0; 80 PetscFunctionReturn(0); 81 } 82 83 /* 84 MatStashDestroy_Private - Destroy the stash 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "MatStashDestroy_Private" 88 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 89 { 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 #ifdef MV 94 if (stash->array) { 95 ierr = PetscFree(stash->array);CHKERRQ(ierr); 96 stash->array = 0; 97 } 98 #endif 99 if (stash->space_head){ 100 ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 101 stash->space_head = 0; 102 } 103 PetscFunctionReturn(0); 104 } 105 106 /* 107 MatStashScatterEnd_Private - This is called as the fial stage of 108 scatter. The final stages of messagepassing is done here, and 109 all the memory used for messagepassing is cleanedu up. This 110 routine also resets the stash, and deallocates the memory used 111 for the stash. It also keeps track of the current memory usage 112 so that the same value can be used the next time through. 113 */ 114 #undef __FUNCT__ 115 #define __FUNCT__ "MatStashScatterEnd_Private" 116 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 117 { 118 PetscErrorCode ierr; 119 int nsends=stash->nsends,bs2,oldnmax; 120 MPI_Status *send_status; 121 122 PetscFunctionBegin; 123 /* wait on sends */ 124 if (nsends) { 125 ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 126 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 127 ierr = PetscFree(send_status);CHKERRQ(ierr); 128 } 129 130 /* Now update nmaxold to be app 10% more than max n used, this way the 131 wastage of space is reduced the next time this stash is used. 132 Also update the oldmax, only if it increases */ 133 if (stash->n) { 134 bs2 = stash->bs*stash->bs; 135 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 136 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 137 } 138 139 stash->nmax = 0; 140 stash->n = 0; 141 stash->reallocs = -1; 142 stash->nprocessed = 0; 143 #ifdef MV 144 if (stash->array) { 145 ierr = PetscFree(stash->array);CHKERRQ(ierr); 146 stash->array = 0; 147 stash->idx = 0; 148 stash->idy = 0; 149 } 150 #endif 151 if (stash->space_head){ 152 ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 153 stash->space_head = 0; 154 } 155 if (stash->send_waits) { 156 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 157 stash->send_waits = 0; 158 } 159 if (stash->recv_waits) { 160 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 161 stash->recv_waits = 0; 162 } 163 if (stash->svalues) { 164 ierr = PetscFree(stash->svalues);CHKERRQ(ierr); 165 stash->svalues = 0; 166 } 167 if (stash->rvalues) { 168 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 169 stash->rvalues = 0; 170 } 171 if (stash->rindices) { 172 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 173 stash->rindices = 0; 174 } 175 if (stash->nprocs) { 176 ierr = PetscFree(stash->nprocs);CHKERRQ(ierr); 177 stash->nprocs = 0; 178 } 179 180 PetscFunctionReturn(0); 181 } 182 183 /* 184 MatStashGetInfo_Private - Gets the relavant statistics of the stash 185 186 Input Parameters: 187 stash - the stash 188 nstash - the size of the stash. Indicates the number of values stored. 189 reallocs - the number of additional mallocs incurred. 190 191 */ 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatStashGetInfo_Private" 194 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 195 { 196 PetscInt bs2 = stash->bs*stash->bs; 197 198 PetscFunctionBegin; 199 if (nstash) *nstash = stash->n*bs2; 200 if (reallocs) { 201 if (stash->reallocs < 0) *reallocs = 0; 202 else *reallocs = stash->reallocs; 203 } 204 PetscFunctionReturn(0); 205 } 206 207 208 /* 209 MatStashSetInitialSize_Private - Sets the initial size of the stash 210 211 Input Parameters: 212 stash - the stash 213 max - the value that is used as the max size of the stash. 214 this value is used while allocating memory. 215 */ 216 #undef __FUNCT__ 217 #define __FUNCT__ "MatStashSetInitialSize_Private" 218 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 219 { 220 PetscFunctionBegin; 221 stash->umax = max; 222 PetscFunctionReturn(0); 223 } 224 225 /* MatStashExpand_Private - Expand the stash. This function is called 226 when the space in the stash is not sufficient to add the new values 227 being inserted into the stash. 228 229 Input Parameters: 230 stash - the stash 231 incr - the minimum increase requested 232 233 Notes: 234 This routine doubles the currently used memory. 235 */ 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatStashExpand_Private" 238 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 239 { 240 PetscErrorCode ierr; 241 PetscInt *n_idx,*n_idy,newnmax,bs2= stash->bs*stash->bs; 242 MatScalar *n_array; 243 244 PetscFunctionBegin; 245 /* allocate a larger stash */ 246 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 247 if (stash->umax) newnmax = stash->umax/bs2; 248 else newnmax = DEFAULT_STASH_SIZE/bs2; 249 } else if (!stash->nmax) { /* resuing stash */ 250 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 251 else newnmax = stash->oldnmax/bs2; 252 } else newnmax = stash->nmax*2; 253 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 254 255 /* Get a MatStashSpace and attach it to stash */ 256 if (!stash->nmax) { /* new stash or resuing stash->oldnmax */ 257 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space_head);CHKERRQ(ierr); 258 stash->space = stash->space_head; 259 } else { 260 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 261 } 262 #ifdef MV 263 PetscMPIInt rank; 264 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 265 printf("[%d] MatStashExpand ends, incr %d, space %p, space->val %p\n",rank,incr,stash->space,(stash->space)->val); 266 #endif 267 #ifdef MV 268 ierr = PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr); 269 n_idx = (PetscInt*)(n_array + bs2*newnmax); 270 n_idy = (PetscInt*)(n_idx + newnmax); 271 ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr); 272 ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr); 273 ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));CHKERRQ(ierr); 274 if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);} 275 stash->array = n_array; 276 stash->idx = n_idx; 277 stash->idy = n_idy; 278 #endif /* MV */ 279 stash->reallocs++; 280 stash->nmax = newnmax; 281 PetscFunctionReturn(0); 282 } 283 /* 284 MatStashValuesRow_Private - inserts values into the stash. This function 285 expects the values to be roworiented. Multiple columns belong to the same row 286 can be inserted with a single call to this function. 287 288 Input Parameters: 289 stash - the stash 290 row - the global row correspoiding to the values 291 n - the number of elements inserted. All elements belong to the above row. 292 idxn - the global column indices corresponding to each of the values. 293 values - the values inserted 294 */ 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatStashValuesRow_Private" 297 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[]) 298 { 299 PetscErrorCode ierr; 300 PetscInt i,k; 301 PetscMatStashSpace space=stash->space; 302 303 PetscFunctionBegin; 304 #ifdef MV 305 PetscMPIInt rank; 306 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 307 #endif 308 /* Check and see if we have sufficient memory */ 309 if (!space || space->local_remaining < n){ 310 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 311 } 312 space = stash->space; 313 #ifdef MV 314 if (rank == 1){ 315 printf(" [%d] MatStashValuesRow, local_remaining %d, n %d\n",rank,space->local_remaining,n); 316 } 317 /* printf("space %p, stash %d values, local_used %d\n",space,n,space->local_used); */ 318 #endif 319 k = space->local_used; 320 for (i=0; i<n; i++) { 321 space->idx[k] = row; 322 space->idy[k] = idxn[i]; 323 space->val[k] = values[i]; 324 k++; 325 #ifdef MV 326 stash->idx[stash->n] = row; 327 stash->idy[stash->n] = idxn[i]; 328 stash->array[stash->n] = values[i]; 329 #endif 330 stash->n++; 331 } 332 /* stash->n += n; */ 333 space->local_used += n; 334 space->local_remaining -= n; 335 PetscFunctionReturn(0); 336 } 337 338 /* 339 MatStashValuesCol_Private - inserts values into the stash. This function 340 expects the values to be columnoriented. Multiple columns belong to the same row 341 can be inserted with a single call to this function. 342 343 Input Parameters: 344 stash - the stash 345 row - the global row correspoiding to the values 346 n - the number of elements inserted. All elements belong to the above row. 347 idxn - the global column indices corresponding to each of the values. 348 values - the values inserted 349 stepval - the consecutive values are sepated by a distance of stepval. 350 this happens because the input is columnoriented. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatStashValuesCol_Private" 354 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval) 355 { 356 PetscErrorCode ierr; 357 PetscInt i,k; 358 PetscMatStashSpace space=stash->space; 359 360 PetscFunctionBegin; 361 /* Check and see if we have sufficient memory */ 362 if (!space || space->local_remaining < n){ 363 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 364 } 365 space = stash->space; 366 k = space->local_used; 367 for (i=0; i<n; i++) { 368 space->idx[k] = row; 369 space->idy[k] = idxn[i]; 370 space->val[k] = values[i*stepval]; 371 k++; 372 #ifdef MV 373 stash->idx[stash->n] = row; 374 stash->idy[stash->n] = idxn[i]; 375 stash->array[stash->n] = values[i*stepval]; 376 #endif 377 stash->n++; 378 } 379 /* stash->n += n; */ 380 space->local_used += n; 381 space->local_remaining -= n; 382 PetscFunctionReturn(0); 383 } 384 385 /* 386 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 387 This function expects the values to be roworiented. Multiple columns belong 388 to the same block-row can be inserted with a single call to this function. 389 This function extracts the sub-block of values based on the dimensions of 390 the original input block, and the row,col values corresponding to the blocks. 391 392 Input Parameters: 393 stash - the stash 394 row - the global block-row correspoiding to the values 395 n - the number of elements inserted. All elements belong to the above row. 396 idxn - the global block-column indices corresponding to each of the blocks of 397 values. Each block is of size bs*bs. 398 values - the values inserted 399 rmax - the number of block-rows in the original block. 400 cmax - the number of block-columsn on the original block. 401 idx - the index of the current block-row in the original block. 402 */ 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatStashValuesRowBlocked_Private" 405 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 406 { 407 PetscErrorCode ierr; 408 PetscInt i,j,k,bs2,bs=stash->bs,l; 409 const MatScalar *vals; 410 MatScalar *array; 411 PetscMatStashSpace space=stash->space; 412 413 PetscFunctionBegin; 414 if (!space || space->local_remaining < n){ 415 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 416 } 417 space = stash->space; 418 l = space->local_used; 419 bs2 = bs*bs; 420 for (i=0; i<n; i++) { 421 space->idx[l] = row; 422 space->idy[l] = idxn[i]; 423 /* Now copy over the block of values. Store the values column oriented. 424 This enables inserting multiple blocks belonging to a row with a single 425 funtion call */ 426 array = space->val + bs2*l; 427 vals = values + idx*bs2*n + bs*i; 428 for (j=0; j<bs; j++) { 429 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 430 array++; 431 vals += cmax*bs; 432 } 433 l++; 434 #ifdef MV 435 stash->idx[stash->n] = row; 436 stash->idy[stash->n] = idxn[i]; 437 /* Now copy over the block of values. Store the values column oriented. 438 This enables inserting multiple blocks belonging to a row with a single 439 funtion call */ 440 array = stash->array + bs2*stash->n; 441 vals = values + idx*bs2*n + bs*i; 442 for (j=0; j<bs; j++) { 443 for (k=0; k<bs; k++) {array[k*bs] = vals[k];} 444 array += 1; 445 vals += cmax*bs; 446 } 447 #endif 448 stash->n++; 449 } 450 space->local_used += n; 451 space->local_remaining -= n; 452 PetscFunctionReturn(0); 453 } 454 455 /* 456 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 457 This function expects the values to be roworiented. Multiple columns belong 458 to the same block-row can be inserted with a single call to this function. 459 This function extracts the sub-block of values based on the dimensions of 460 the original input block, and the row,col values corresponding to the blocks. 461 462 Input Parameters: 463 stash - the stash 464 row - the global block-row correspoiding to the values 465 n - the number of elements inserted. All elements belong to the above row. 466 idxn - the global block-column indices corresponding to each of the blocks of 467 values. Each block is of size bs*bs. 468 values - the values inserted 469 rmax - the number of block-rows in the original block. 470 cmax - the number of block-columsn on the original block. 471 idx - the index of the current block-row in the original block. 472 */ 473 #undef __FUNCT__ 474 #define __FUNCT__ "MatStashValuesColBlocked_Private" 475 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 476 { 477 PetscErrorCode ierr; 478 PetscInt i,j,k,bs2,bs=stash->bs,l; 479 const MatScalar *vals; 480 MatScalar *array; 481 PetscMatStashSpace space=stash->space; 482 483 PetscFunctionBegin; 484 if (!space || space->local_remaining < n){ 485 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 486 } 487 space = stash->space; 488 l = space->local_used; 489 bs2 = bs*bs; 490 for (i=0; i<n; i++) { 491 space->idx[l] = row; 492 space->idy[l] = idxn[i]; 493 /* Now copy over the block of values. Store the values column oriented. 494 This enables inserting multiple blocks belonging to a row with a single 495 funtion call */ 496 array = space->val + bs2*l; 497 vals = values + idx*bs2*n + bs*i; 498 for (j=0; j<bs; j++) { 499 for (k=0; k<bs; k++) {array[k] = vals[k];} 500 array += bs; 501 vals += rmax*bs; 502 } 503 #ifdef MV 504 stash->idx[stash->n] = row; 505 stash->idy[stash->n] = idxn[i]; 506 /* Now copy over the block of values. Store the values column oriented. 507 This enables inserting multiple blocks belonging to a row with a single 508 funtion call */ 509 array = stash->array + bs2*stash->n; 510 vals = values + idx*bs + bs2*rmax*i; 511 for (j=0; j<bs; j++) { 512 for (k=0; k<bs; k++) {array[k] = vals[k];} 513 array += bs; 514 vals += rmax*bs; 515 } 516 #endif 517 stash->n++; 518 } 519 space->local_used += n; 520 space->local_remaining -= n; 521 PetscFunctionReturn(0); 522 } 523 /* 524 MatStashScatterBegin_Private - Initiates the transfer of values to the 525 correct owners. This function goes through the stash, and check the 526 owners of each stashed value, and sends the values off to the owner 527 processors. 528 529 Input Parameters: 530 stash - the stash 531 owners - an array of size 'no-of-procs' which gives the ownership range 532 for each node. 533 534 Notes: The 'owners' array in the cased of the blocked-stash has the 535 ranges specified blocked global indices, and for the regular stash in 536 the proper global indices. 537 */ 538 #undef __FUNCT__ 539 #define __FUNCT__ "MatStashScatterBegin_Private" 540 PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners) 541 { 542 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 543 PetscInt size=stash->size,nsends; 544 PetscErrorCode ierr; 545 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 546 MatScalar **rvalues,*svalues; 547 MPI_Comm comm = stash->comm; 548 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 549 PetscMPIInt *nprocs,*nlengths,nreceives; 550 PetscInt *idx_ptr,*idy,n=stash->n; 551 MatScalar *val; 552 PetscMatStashSpace space,space_next,space_head=stash->space_head; 553 554 PetscFunctionBegin; 555 bs2 = stash->bs*stash->bs; 556 #ifdef MV 557 PetscMPIInt rank; 558 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 559 #endif 560 561 #ifdef MV 562 /* Copy values of space into val, idx, idy, and destroy space */ 563 ierr = PetscMalloc((n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&val);CHKERRQ(ierr); 564 idx_ptr = (PetscInt*)(val + bs2*n); 565 idy = (PetscInt*)(idx_ptr + n); 566 ierr = PetscMatStashSpaceContiguous(bs2,&stash->space_head,val,idx_ptr,idy);CHKERRQ(ierr); 567 /* printf("[%d] Before and after SpaceContiguous, space->head %p/%p\n",rank,space_head,stash->space_head); */ 568 ierr = PetscFree(val);CHKERRQ(ierr); 569 #endif 570 #ifdef MV 571 printf("[%d] compare array with val ...\n",rank); 572 PetscScalar *array,*valtmp; 573 for (i=0; i<stash->n; i++){ 574 array = stash->array +i*bs2; 575 valtmp = val+i*bs2; 576 for (j=0; j<bs2; j++){ 577 if (*array != *valtmp) SETERRQ3(PETSC_ERR_ARG_SIZ, "%d, array %g != val %g",i,*array,*valtmp); 578 array++; valtmp++; 579 } 580 if (stash->idx[i] != idx_ptr[i]) SETERRQ3(PETSC_ERR_ARG_SIZ, "%d, array %d != idx %d",i,stash->idx[i],idx_ptr[i]); 581 if (stash->idy[i] != idy[i]) SETERRQ3(PETSC_ERR_ARG_SIZ, "%d, array %d != idy %d",i,stash->idy[i],idy[i]); 582 } 583 #endif 584 585 /* first count number of contributors to each processor */ 586 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 587 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr); 588 ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 589 590 nlengths = nprocs+size; 591 i = j = 0; 592 lastidx = -1; 593 space = space_head; 594 while (space != PETSC_NULL){ 595 space_next = space->next; 596 for (l=0; l<space->local_used; l++){ 597 idx=space->idx[l]; 598 /* if indices are NOT locally sorted, need to start search at the beginning */ 599 if (lastidx > idx) j = 0; /* idx = stash->idx[i] */ 600 lastidx = idx; 601 for (; j<size; j++) { 602 if (idx >= owners[j] && idx < owners[j+1]) { 603 nlengths[j]++; owner[i] = j; break; 604 } 605 } 606 i++; 607 } 608 space = space_next; 609 } 610 /* Now check what procs get messages - and compute nsends. */ 611 for (i=0, nsends=0 ; i<size; i++) { 612 if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 613 } 614 615 { int *onodes,*olengths; 616 /* Determine the number of messages to expect, their lengths, from from-ids */ 617 ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 618 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 619 /* since clubbing row,col - lengths are multiplied by 2 */ 620 for (i=0; i<nreceives; i++) olengths[i] *=2; 621 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 622 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 623 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 624 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 625 ierr = PetscFree(onodes);CHKERRQ(ierr); 626 ierr = PetscFree(olengths);CHKERRQ(ierr); 627 } 628 629 /* do sends: 630 1) starts[i] gives the starting index in svalues for stuff going to 631 the ith processor 632 */ 633 ierr = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr); 634 sindices = (PetscInt*)(svalues + bs2*stash->n); 635 ierr = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 636 ierr = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr); 637 starti = startv + size; 638 /* use 2 sends the first with all_a, the next with all_i and all_j */ 639 startv[0] = 0; starti[0] = 0; 640 for (i=1; i<size; i++) { 641 startv[i] = startv[i-1] + nlengths[i-1]; 642 starti[i] = starti[i-1] + nlengths[i-1]*2; 643 } 644 645 i = 0; 646 space = space_head; 647 while (space != PETSC_NULL){ 648 space_next = space->next; 649 for (l=0; l<space->local_used; l++){ 650 j = owner[i]; 651 if (bs2 == 1) { 652 svalues[startv[j]] = space->val[l]; /* = stash->array[i]; */ 653 } else { 654 PetscInt k; 655 MatScalar *buf1,*buf2; 656 buf1 = svalues+bs2*startv[j]; 657 buf2 = space->val + bs2*i; /* stash->array+bs2*i; */ 658 for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 659 } 660 sindices[starti[j]] = space->idx[l]; /* stash->idx[i]; */ 661 sindices[starti[j]+nlengths[j]] = space->idy[l]; /* stash->idy[i]; */ 662 startv[j]++; 663 starti[j]++; 664 i++; 665 } 666 space = space_next; 667 } 668 startv[0] = 0; 669 for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 670 671 for (i=0,count=0; i<size; i++) { 672 if (nprocs[i]) { 673 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 674 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 675 } 676 } 677 #if defined(PETSC_USE_INFO) 678 ierr = PetscInfo1(0,"No of messages: %d \n",nsends);CHKERRQ(ierr); 679 for (i=0; i<size; i++) { 680 if (nprocs[i]) { 681 ierr = PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 682 } 683 } 684 #endif 685 ierr = PetscFree(owner);CHKERRQ(ierr); 686 ierr = PetscFree(startv);CHKERRQ(ierr); 687 /* This memory is reused in scatter end for a different purpose*/ 688 for (i=0; i<2*size; i++) nprocs[i] = -1; 689 stash->nprocs = nprocs; 690 691 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 692 ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 693 694 for (i=0; i<nreceives; i++) { 695 recv_waits[2*i] = recv_waits1[i]; 696 recv_waits[2*i+1] = recv_waits2[i]; 697 } 698 stash->recv_waits = recv_waits; 699 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 700 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 701 702 stash->svalues = svalues; stash->rvalues = rvalues; 703 stash->rindices = rindices; stash->send_waits = send_waits; 704 stash->nsends = nsends; stash->nrecvs = nreceives; 705 PetscFunctionReturn(0); 706 } 707 708 /* 709 MatStashScatterGetMesg_Private - This function waits on the receives posted 710 in the function MatStashScatterBegin_Private() and returns one message at 711 a time to the calling function. If no messages are left, it indicates this 712 by setting flg = 0, else it sets flg = 1. 713 714 Input Parameters: 715 stash - the stash 716 717 Output Parameters: 718 nvals - the number of entries in the current message. 719 rows - an array of row indices (or blocked indices) corresponding to the values 720 cols - an array of columnindices (or blocked indices) corresponding to the values 721 vals - the values 722 flg - 0 indicates no more message left, and the current call has no values associated. 723 1 indicates that the current call successfully received a message, and the 724 other output parameters nvals,rows,cols,vals are set appropriately. 725 */ 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatStashScatterGetMesg_Private" 728 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg) 729 { 730 PetscErrorCode ierr; 731 PetscMPIInt i,*flg_v,i1,i2; 732 PetscInt bs2; 733 MPI_Status recv_status; 734 PetscTruth match_found = PETSC_FALSE; 735 736 PetscFunctionBegin; 737 738 *flg = 0; /* When a message is discovered this is reset to 1 */ 739 /* Return if no more messages to process */ 740 if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 741 742 flg_v = stash->nprocs; 743 bs2 = stash->bs*stash->bs; 744 /* If a matching pair of receieves are found, process them, and return the data to 745 the calling function. Until then keep receiving messages */ 746 while (!match_found) { 747 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 748 /* Now pack the received message into a structure which is useable by others */ 749 if (i % 2) { 750 ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr); 751 flg_v[2*recv_status.MPI_SOURCE] = i/2; 752 *nvals = *nvals/bs2; 753 } else { 754 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 755 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 756 *nvals = *nvals/2; /* This message has both row indices and col indices */ 757 } 758 759 /* Check if we have both the messages from this proc */ 760 i1 = flg_v[2*recv_status.MPI_SOURCE]; 761 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 762 if (i1 != -1 && i2 != -1) { 763 *rows = stash->rindices[i2]; 764 *cols = *rows + *nvals; 765 *vals = stash->rvalues[i1]; 766 *flg = 1; 767 stash->nprocessed ++; 768 match_found = PETSC_TRUE; 769 } 770 } 771 PetscFunctionReturn(0); 772 } 773