1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: matstash.c,v 1.27 1999/03/18 00:33:52 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/matimpl.h" 6 7 #define DEFAULT_STASH_SIZE 10000 8 9 /* 10 MatStashCreate_Private - Creates a stash ,currently used for all the parallel 11 matrix implementations. The stash is where elements of a matrix destined 12 to be stored on other processors are kept until matrix assembly is done. 13 14 This is a simple minded stash. Simply adds entries to end of stash. 15 16 Input Parameters: 17 comm - communicator, required for scatters. 18 bs - stash block size. used when stashing blocks of values 19 20 Output Parameters: 21 stash - the newly created stash 22 */ 23 #undef __FUNC__ 24 #define __FUNC__ "MatStashCreate_Private" 25 int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash) 26 { 27 int ierr,flg,max=DEFAULT_STASH_SIZE; 28 29 PetscFunctionBegin; 30 /* Require 2 tags, get the second using PetscCommGetNewTag() */ 31 ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 32 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr); 33 ierr = OptionsGetInt(PETSC_NULL,"-matstash_initial_size",&max,&flg);CHKERRQ(ierr); 34 ierr = MatStashSetInitialSize_Private(stash,max); CHKERRQ(ierr); 35 ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr); 36 ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr); 37 38 if (bs <= 0) bs = 1; 39 40 stash->bs = bs; 41 stash->nmax = 0; 42 stash->n = 0; 43 stash->reallocs = -1; 44 stash->idx = 0; 45 stash->idy = 0; 46 stash->array = 0; 47 48 stash->send_waits = 0; 49 stash->recv_waits = 0; 50 stash->send_status = 0; 51 stash->nsends = 0; 52 stash->nrecvs = 0; 53 stash->svalues = 0; 54 stash->rvalues = 0; 55 stash->rmax = 0; 56 stash->nprocs = 0; 57 stash->nprocessed = 0; 58 PetscFunctionReturn(0); 59 } 60 61 /* 62 MatStashDestroy_Private - Destroy the stash 63 */ 64 #undef __FUNC__ 65 #define __FUNC__ "MatStashDestroy_Private" 66 int MatStashDestroy_Private(MatStash *stash) 67 { 68 int ierr; 69 70 PetscFunctionBegin; 71 ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr); 72 if (stash->array) {PetscFree(stash->array); stash->array = 0;} 73 PetscFunctionReturn(0); 74 } 75 76 /* 77 MatStashScatterEnd_Private - This is called as the fial stage of 78 scatter. The final stages of messagepassing is done here, and 79 all the memory used for messagepassing is cleanedu up. This 80 routine also resets the stash, and deallocates the memory used 81 for the stash. It also keeps track of the current memory usage 82 so that the same value can be used the next time through. 83 */ 84 #undef __FUNC__ 85 #define __FUNC__ "MatStashScatterEnd_Private" 86 int MatStashScatterEnd_Private(MatStash *stash) 87 { 88 int nsends=stash->nsends,ierr,bs2; 89 MPI_Status *send_status; 90 91 PetscFunctionBegin; 92 /* wait on sends */ 93 if (nsends) { 94 send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 95 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 96 PetscFree(send_status); 97 } 98 99 /* Now update nmaxold to be app 10% more than max n used, this way the 100 wastage of space is reduced the next time this stash is used */ 101 bs2 = stash->bs*stash->bs; 102 stash->oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 103 stash->nmax = 0; 104 stash->n = 0; 105 stash->reallocs = -1; 106 stash->rmax = 0; 107 stash->nprocessed = 0; 108 109 if (stash->array) { 110 PetscFree(stash->array); 111 stash->array = 0; 112 stash->idx = 0; 113 stash->idy = 0; 114 } 115 if (stash->send_waits) {PetscFree(stash->send_waits);stash->send_waits = 0;} 116 if (stash->recv_waits) {PetscFree(stash->recv_waits);stash->recv_waits = 0;} 117 if (stash->svalues) {PetscFree(stash->svalues);stash->svalues = 0;} 118 if (stash->rvalues) {PetscFree(stash->rvalues); stash->rvalues = 0;} 119 if (stash->nprocs) {PetscFree(stash->nprocs); stash->nprocs = 0;} 120 121 PetscFunctionReturn(0); 122 } 123 124 /* 125 MatStashGetInfo_Private - Gets the relavant statistics of the stash 126 127 Input Parameters: 128 stash - the stash 129 nstash - the size of the stash. Indicates the number of values stored. 130 reallocs - the number of additional mallocs incurred. 131 132 */ 133 #undef __FUNC__ 134 #define __FUNC__ "MatStashGetInfo_Private" 135 int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs) 136 { 137 int bs2 = stash->bs*stash->bs; 138 139 PetscFunctionBegin; 140 *nstash = stash->n*bs2; 141 *reallocs = stash->reallocs; 142 PetscFunctionReturn(0); 143 } 144 145 146 /* 147 MatStashSetInitialSize_Private - Sets the initial size of the stash 148 149 Input Parameters: 150 stash - the stash 151 max - the value that is used as the max size of the stash. 152 this value is used while allocating memory. 153 */ 154 #undef __FUNC__ 155 #define __FUNC__ "MatStashSetInitialSize_Private" 156 int MatStashSetInitialSize_Private(MatStash *stash,int max) 157 { 158 PetscFunctionBegin; 159 stash->oldnmax = max; 160 stash->nmax = 0; 161 PetscFunctionReturn(0); 162 } 163 164 /* MatStashExpand_Private - Expand the stash. This function is called 165 when the space in the stash is not sufficient to add the new values 166 being inserted into the stash. 167 168 Input Parameters: 169 stash - the stash 170 incr - the minimum increase requested 171 172 Notes: 173 This routine doubles the currently used memory. 174 */ 175 #undef __FUNC__ 176 #define __FUNC__ "MatStashExpand_Private" 177 static int MatStashExpand_Private(MatStash *stash,int incr) 178 { 179 int *n_idx,*n_idy,newnmax,bs2; 180 Scalar *n_array; 181 182 PetscFunctionBegin; 183 /* allocate a larger stash */ 184 bs2 = stash->bs*stash->bs; 185 if (stash->nmax == 0) newnmax = stash->oldnmax/bs2; 186 else newnmax = stash->nmax*2; 187 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 188 189 n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array); 190 n_idx = (int *) (n_array + bs2*newnmax); 191 n_idy = (int *) (n_idx + newnmax); 192 PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar)); 193 PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int)); 194 PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int)); 195 if (stash->array) PetscFree(stash->array); 196 stash->array = n_array; 197 stash->idx = n_idx; 198 stash->idy = n_idy; 199 stash->nmax = newnmax; 200 stash->oldnmax = newnmax*bs2; 201 stash->reallocs++; 202 PetscFunctionReturn(0); 203 } 204 /* 205 MatStashValuesRow_Private - inserts values into the stash. This function 206 expects the values to be roworiented. Multiple columns belong to the same row 207 can be inserted with a single call to this function. 208 209 Input Parameters: 210 stash - the stash 211 row - the global row correspoiding to the values 212 n - the number of elements inserted. All elements belong to the above row. 213 idxn - the global column indices corresponding to each of the values. 214 values - the values inserted 215 */ 216 #undef __FUNC__ 217 #define __FUNC__ "MatStashValuesRow_Private" 218 int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values) 219 { 220 int ierr,i; 221 222 PetscFunctionBegin; 223 /* Check and see if we have sufficient memory */ 224 if ((stash->n + n) > stash->nmax) { 225 ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 226 } 227 for ( i=0; i<n; i++ ) { 228 stash->idx[stash->n] = row; 229 stash->idy[stash->n] = idxn[i]; 230 stash->array[stash->n] = values[i]; 231 stash->n++; 232 } 233 PetscFunctionReturn(0); 234 } 235 /* 236 MatStashValuesCol_Private - inserts values into the stash. This function 237 expects the values to be columnoriented. Multiple columns belong to the same row 238 can be inserted with a single call to this function. 239 240 Input Parameters: 241 stash - the stash 242 row - the global row correspoiding to the values 243 n - the number of elements inserted. All elements belong to the above row. 244 idxn - the global column indices corresponding to each of the values. 245 values - the values inserted 246 stepval - the consecutive values are sepated by a distance of stepval. 247 this happens because the input is columnoriented. 248 */ 249 #undef __FUNC__ 250 #define __FUNC__ "MatStashValuesCol_Private" 251 int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn, 252 Scalar *values,int stepval) 253 { 254 int ierr,i; 255 256 PetscFunctionBegin; 257 /* Check and see if we have sufficient memory */ 258 if ((stash->n + n) > stash->nmax) { 259 ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 260 } 261 for ( i=0; i<n; i++ ) { 262 stash->idx[stash->n] = row; 263 stash->idy[stash->n] = idxn[i]; 264 stash->array[stash->n] = values[i*stepval]; 265 stash->n++; 266 } 267 PetscFunctionReturn(0); 268 } 269 270 /* 271 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 272 This function expects the values to be roworiented. Multiple columns belong 273 to the same block-row can be inserted with a single call to this function. 274 This function extracts the sub-block of values based on the dimensions of 275 the original input block, and the row,col values corresponding to the blocks. 276 277 Input Parameters: 278 stash - the stash 279 row - the global block-row correspoiding to the values 280 n - the number of elements inserted. All elements belong to the above row. 281 idxn - the global block-column indices corresponding to each of the blocks of 282 values. Each block is of size bs*bs. 283 values - the values inserted 284 rmax - the number of block-rows in the original block. 285 cmax - the number of block-columsn on the original block. 286 idx - the index of the current block-row in the original block. 287 */ 288 #undef __FUNC__ 289 #define __FUNC__ "MatStashValuesRowBlocked_Private" 290 int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values, 291 int rmax,int cmax,int idx) 292 { 293 int ierr,i,j,k,bs2,bs=stash->bs; 294 Scalar *vals,*array; 295 296 PetscFunctionBegin; 297 bs2 = bs*bs; 298 if ((stash->n+n) > stash->nmax) { 299 ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 300 } 301 for ( i=0; i<n; i++ ) { 302 stash->idx[stash->n] = row; 303 stash->idy[stash->n] = idxn[i]; 304 /* Now copy over the block of values. Store the values column oriented. 305 This enables inserting multiple blocks belonging to a row with a single 306 funtion call */ 307 array = stash->array + bs2*stash->n; 308 vals = values + idx*bs2*n + bs*i; 309 for ( j=0; j<bs; j++ ) { 310 for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];} 311 array += 1; 312 vals += cmax*bs; 313 } 314 stash->n++; 315 } 316 PetscFunctionReturn(0); 317 } 318 319 /* 320 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 321 This function expects the values to be roworiented. Multiple columns belong 322 to the same block-row can be inserted with a single call to this function. 323 This function extracts the sub-block of values based on the dimensions of 324 the original input block, and the row,col values corresponding to the blocks. 325 326 Input Parameters: 327 stash - the stash 328 row - the global block-row correspoiding to the values 329 n - the number of elements inserted. All elements belong to the above row. 330 idxn - the global block-column indices corresponding to each of the blocks of 331 values. Each block is of size bs*bs. 332 values - the values inserted 333 rmax - the number of block-rows in the original block. 334 cmax - the number of block-columsn on the original block. 335 idx - the index of the current block-row in the original block. 336 */ 337 #undef __FUNC__ 338 #define __FUNC__ "MatStashValuesColBlocked_Private" 339 int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn, 340 Scalar *values,int rmax,int cmax,int idx) 341 { 342 int ierr,i,j,k,bs2,bs=stash->bs; 343 Scalar *vals,*array; 344 345 PetscFunctionBegin; 346 bs2 = bs*bs; 347 if ((stash->n+n) > stash->nmax) { 348 ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 349 } 350 for ( i=0; i<n; i++ ) { 351 stash->idx[stash->n] = row; 352 stash->idy[stash->n] = idxn[i]; 353 /* Now copy over the block of values. Store the values column oriented. 354 This enables inserting multiple blocks belonging to a row with a single 355 funtion call */ 356 array = stash->array + bs2*stash->n; 357 vals = values + idx*bs + bs2*rmax*i; 358 for ( j=0; j<bs; j++ ) { 359 for ( k=0; k<bs; k++ ) {array[k] = vals[k];} 360 array += bs; 361 vals += rmax*bs; 362 } 363 stash->n++; 364 } 365 PetscFunctionReturn(0); 366 } 367 /* 368 MatStashScatterBegin_Private - Initiates the transfer of values to the 369 correct owners. This function goes through the stash, and check the 370 owners of each stashed value, and sends the values off to the owner 371 processors. 372 373 Input Parameters: 374 stash - the stash 375 owners - an array of size 'no-of-procs' which gives the ownership range 376 for each node. 377 378 Notes: The 'owners' array in the cased of the blocked-stash has the 379 ranges specified blocked global indices, and for the regular stash in 380 the proper global indices. 381 */ 382 #undef __FUNC__ 383 #define __FUNC__ "MatStashScatterBegin_Private" 384 int MatStashScatterBegin_Private(MatStash *stash,int *owners) 385 { 386 int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 387 int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives; 388 int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx; 389 Scalar *rvalues,*svalues; 390 MPI_Comm comm = stash->comm; 391 MPI_Request *send_waits,*recv_waits; 392 393 PetscFunctionBegin; 394 395 bs2 = stash->bs*stash->bs; 396 /* first count number of contributors to each processor */ 397 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 398 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 399 owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner); 400 401 for ( i=0; i<stash->n; i++ ) { 402 idx = stash->idx[i]; 403 for ( j=0; j<size; j++ ) { 404 if (idx >= owners[j] && idx < owners[j+1]) { 405 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 406 } 407 } 408 } 409 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 410 411 /* inform other processors of number of messages and max length*/ 412 work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work); 413 ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 414 nreceives = work[rank]; 415 ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 416 nmax = work[rank]; 417 PetscFree(work); 418 /* post receives: 419 since we don't know how long each individual message is we 420 allocate the largest needed buffer for each receive. Potentially 421 this is a lot of wasted space. 422 */ 423 rvalues = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues); 424 rindices = (int *) (rvalues + bs2*nreceives*nmax); 425 recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits); 426 for ( i=0,count=0; i<nreceives; i++ ) { 427 ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm, 428 recv_waits+count++); CHKERRQ(ierr); 429 ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm, 430 recv_waits+count++); CHKERRQ(ierr); 431 } 432 433 /* do sends: 434 1) starts[i] gives the starting index in svalues for stuff going to 435 the ith processor 436 */ 437 svalues = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues); 438 sindices = (int *) (svalues + bs2*stash->n); 439 send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request)); 440 CHKPTRQ(send_waits); 441 startv = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv); 442 starti = startv + size; 443 /* use 2 sends the first with all_a, the next with all_i and all_j */ 444 startv[0] = 0; starti[0] = 0; 445 for ( i=1; i<size; i++ ) { 446 startv[i] = startv[i-1] + nprocs[i-1]; 447 starti[i] = starti[i-1] + nprocs[i-1]*2; 448 } 449 for ( i=0; i<stash->n; i++ ) { 450 j = owner[i]; 451 if (bs2 == 1) { 452 svalues[startv[j]] = stash->array[i]; 453 } else { 454 int k; 455 Scalar *buf1,*buf2; 456 buf1 = svalues+bs2*startv[j]; 457 buf2 = stash->array+bs2*i; 458 for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; } 459 } 460 sindices[starti[j]] = stash->idx[i]; 461 sindices[starti[j]+nprocs[j]] = stash->idy[i]; 462 startv[j]++; 463 starti[j]++; 464 } 465 startv[0] = 0; 466 for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];} 467 for ( i=0,count=0; i<size; i++ ) { 468 if (procs[i]) { 469 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm, 470 send_waits+count++);CHKERRQ(ierr); 471 ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 472 send_waits+count++);CHKERRQ(ierr); 473 } 474 } 475 PetscFree(owner); 476 PetscFree(startv); 477 /* This memory is reused in scatter end for a different purpose*/ 478 for (i=0; i<2*size; i++ ) nprocs[i] = -1; 479 stash->nprocs = nprocs; 480 481 stash->svalues = svalues; stash->rvalues = rvalues; 482 stash->nsends = nsends; stash->nrecvs = nreceives; 483 stash->send_waits = send_waits; stash->recv_waits = recv_waits; 484 stash->rmax = nmax; 485 PetscFunctionReturn(0); 486 } 487 488 /* 489 MatStashScatterGetMesg_Private - This function waits on the receives posted 490 in the function MatStashScatterBegin_Private() and returns one message at 491 a time to the calling function. If no messages are left, it indicates this 492 by setting flg = 0, else it sets flg = 1. 493 494 Input Parameters: 495 stash - the stash 496 497 Output Parameters: 498 nvals - the number of entries in the current message. 499 rows - an array of row indices (or blocked indices) corresponding to the values 500 cols - an array of columnindices (or blocked indices) corresponding to the values 501 vals - the values 502 flg - 0 indicates no more message left, and the current call has no values associated. 503 1 indicates that the current call successfully received a message, and the 504 other output parameters nvals,rows,cols,vals are set appropriately. 505 */ 506 #undef __FUNC__ 507 #define __FUNC__ "MatStashScatterGetMesg_Private" 508 int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg) 509 { 510 int i,ierr,size=stash->size,*flg_v,*flg_i; 511 int i1,i2,*rindices,match_found=0,bs2; 512 MPI_Status recv_status; 513 514 PetscFunctionBegin; 515 516 *flg = 0; /* When a message is discovered this is reset to 1 */ 517 /* Return if no more messages to process */ 518 if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 519 520 flg_v = stash->nprocs; 521 flg_i = flg_v + size; 522 bs2 = stash->bs*stash->bs; 523 /* If a matching pair of receieves are found, process them, and return the data to 524 the calling function. Until then keep receiving messages */ 525 while (!match_found) { 526 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 527 /* Now pack the received message into a structure which is useable by others */ 528 if (i % 2) { 529 ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 530 flg_i[recv_status.MPI_SOURCE] = i/2; 531 *nvals = *nvals/2; /* This message has both row indices and col indices */ 532 } else { 533 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 534 flg_v[recv_status.MPI_SOURCE] = i/2; 535 *nvals = *nvals/bs2; 536 } 537 538 /* Check if we have both the messages from this proc */ 539 i1 = flg_v[recv_status.MPI_SOURCE]; 540 i2 = flg_i[recv_status.MPI_SOURCE]; 541 if (i1 != -1 && i2 != -1) { 542 rindices = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs); 543 *rows = rindices + 2*i2*stash->rmax; 544 *cols = *rows + *nvals; 545 *vals = stash->rvalues + i1*bs2*stash->rmax; 546 *flg = 1; 547 stash->nprocessed ++; 548 match_found = 1; 549 } 550 } 551 PetscFunctionReturn(0); 552 } 553