1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: matstash.c,v 1.28 1999/03/18 01:26:13 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/matimpl.h" 6 7 #define DEFAULT_STASH_SIZE 10000 8 9 /* 10 MatStashCreate_Private - Creates a stash ,currently used for all the parallel 11 matrix implementations. The stash is where elements of a matrix destined 12 to be stored on other processors are kept until matrix assembly is done. 13 14 This is a simple minded stash. Simply adds entries to end of stash. 15 16 Input Parameters: 17 comm - communicator, required for scatters. 18 bs - stash block size. used when stashing blocks of values 19 20 Output Parameters: 21 stash - the newly created stash 22 */ 23 #undef __FUNC__ 24 #define __FUNC__ "MatStashCreate_Private" 25 int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash) 26 { 27 int ierr,flg,max,*opt,nopt; 28 29 PetscFunctionBegin; 30 /* Require 2 tags, get the second using PetscCommGetNewTag() */ 31 ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 32 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr); 33 ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr); 34 ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr); 35 36 nopt = stash->size; 37 opt = (int*) PetscMalloc(nopt*sizeof(int)); CHKPTRQ(opt); 38 ierr = OptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 39 if (flg) { 40 if (nopt == 1) max = opt[0]; 41 else if (nopt == stash->size) max = opt[stash->rank]; 42 else if (stash->rank < nopt) max = opt[stash->rank]; 43 /* else use the default */ 44 stash->umax = max; 45 } else { 46 stash->umax = 0; 47 } 48 PetscFree(opt); 49 if (bs <= 0) bs = 1; 50 51 stash->bs = bs; 52 stash->nmax = 0; 53 stash->oldnmax = 0; 54 stash->n = 0; 55 stash->reallocs = -1; 56 stash->idx = 0; 57 stash->idy = 0; 58 stash->array = 0; 59 60 stash->send_waits = 0; 61 stash->recv_waits = 0; 62 stash->send_status = 0; 63 stash->nsends = 0; 64 stash->nrecvs = 0; 65 stash->svalues = 0; 66 stash->rvalues = 0; 67 stash->rmax = 0; 68 stash->nprocs = 0; 69 stash->nprocessed = 0; 70 PetscFunctionReturn(0); 71 } 72 73 /* 74 MatStashDestroy_Private - Destroy the stash 75 */ 76 #undef __FUNC__ 77 #define __FUNC__ "MatStashDestroy_Private" 78 int MatStashDestroy_Private(MatStash *stash) 79 { 80 int ierr; 81 82 PetscFunctionBegin; 83 ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr); 84 if (stash->array) {PetscFree(stash->array); stash->array = 0;} 85 PetscFunctionReturn(0); 86 } 87 88 /* 89 MatStashScatterEnd_Private - This is called as the fial stage of 90 scatter. The final stages of messagepassing is done here, and 91 all the memory used for messagepassing is cleanedu up. This 92 routine also resets the stash, and deallocates the memory used 93 for the stash. It also keeps track of the current memory usage 94 so that the same value can be used the next time through. 95 */ 96 #undef __FUNC__ 97 #define __FUNC__ "MatStashScatterEnd_Private" 98 int MatStashScatterEnd_Private(MatStash *stash) 99 { 100 int nsends=stash->nsends,ierr,bs2,oldnmax; 101 MPI_Status *send_status; 102 103 PetscFunctionBegin; 104 /* wait on sends */ 105 if (nsends) { 106 send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 107 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 108 PetscFree(send_status); 109 } 110 111 /* Now update nmaxold to be app 10% more than max n used, this way the 112 wastage of space is reduced the next time this stash is used. 113 Also update the oldmax, only if it increases */ 114 bs2 = stash->bs*stash->bs; 115 oldnmax = ((int)(stash->n * 1.1) + 5)*stash->bs; 116 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 117 118 stash->nmax = 0; 119 stash->n = 0; 120 stash->reallocs = -1; 121 stash->rmax = 0; 122 stash->nprocessed = 0; 123 124 if (stash->array) { 125 PetscFree(stash->array); 126 stash->array = 0; 127 stash->idx = 0; 128 stash->idy = 0; 129 } 130 if (stash->send_waits) {PetscFree(stash->send_waits);stash->send_waits = 0;} 131 if (stash->recv_waits) {PetscFree(stash->recv_waits);stash->recv_waits = 0;} 132 if (stash->svalues) {PetscFree(stash->svalues);stash->svalues = 0;} 133 if (stash->rvalues) {PetscFree(stash->rvalues); stash->rvalues = 0;} 134 if (stash->nprocs) {PetscFree(stash->nprocs); stash->nprocs = 0;} 135 136 PetscFunctionReturn(0); 137 } 138 139 /* 140 MatStashGetInfo_Private - Gets the relavant statistics of the stash 141 142 Input Parameters: 143 stash - the stash 144 nstash - the size of the stash. Indicates the number of values stored. 145 reallocs - the number of additional mallocs incurred. 146 147 */ 148 #undef __FUNC__ 149 #define __FUNC__ "MatStashGetInfo_Private" 150 int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs) 151 { 152 int bs2 = stash->bs*stash->bs; 153 154 PetscFunctionBegin; 155 *nstash = stash->n*bs2; 156 if (stash->reallocs < 0) *reallocs = 0; 157 else *reallocs = stash->reallocs; 158 PetscFunctionReturn(0); 159 } 160 161 162 /* 163 MatStashSetInitialSize_Private - Sets the initial size of the stash 164 165 Input Parameters: 166 stash - the stash 167 max - the value that is used as the max size of the stash. 168 this value is used while allocating memory. 169 */ 170 #undef __FUNC__ 171 #define __FUNC__ "MatStashSetInitialSize_Private" 172 int MatStashSetInitialSize_Private(MatStash *stash,int max) 173 { 174 PetscFunctionBegin; 175 stash->umax = max; 176 PetscFunctionReturn(0); 177 } 178 179 /* MatStashExpand_Private - Expand the stash. This function is called 180 when the space in the stash is not sufficient to add the new values 181 being inserted into the stash. 182 183 Input Parameters: 184 stash - the stash 185 incr - the minimum increase requested 186 187 Notes: 188 This routine doubles the currently used memory. 189 */ 190 #undef __FUNC__ 191 #define __FUNC__ "MatStashExpand_Private" 192 static int MatStashExpand_Private(MatStash *stash,int incr) 193 { 194 int *n_idx,*n_idy,newnmax,bs2; 195 Scalar *n_array; 196 197 PetscFunctionBegin; 198 /* allocate a larger stash */ 199 bs2 = stash->bs*stash->bs; 200 if (stash->oldnmax == 0) { /* new stash */ 201 if (stash->umax) newnmax = stash->umax/bs2; 202 else newnmax = DEFAULT_STASH_SIZE/bs2; 203 } else if (stash->nmax == 0) { /* resuing stash */ 204 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 205 else newnmax = stash->oldnmax/bs2; 206 } else newnmax = stash->nmax*2; 207 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 208 209 n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array); 210 n_idx = (int *) (n_array + bs2*newnmax); 211 n_idy = (int *) (n_idx + newnmax); 212 PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar)); 213 PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int)); 214 PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int)); 215 if (stash->array) PetscFree(stash->array); 216 stash->array = n_array; 217 stash->idx = n_idx; 218 stash->idy = n_idy; 219 stash->nmax = newnmax; 220 stash->oldnmax = newnmax*bs2; 221 stash->reallocs++; 222 PetscFunctionReturn(0); 223 } 224 /* 225 MatStashValuesRow_Private - inserts values into the stash. This function 226 expects the values to be roworiented. Multiple columns belong to the same row 227 can be inserted with a single call to this function. 228 229 Input Parameters: 230 stash - the stash 231 row - the global row correspoiding to the values 232 n - the number of elements inserted. All elements belong to the above row. 233 idxn - the global column indices corresponding to each of the values. 234 values - the values inserted 235 */ 236 #undef __FUNC__ 237 #define __FUNC__ "MatStashValuesRow_Private" 238 int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values) 239 { 240 int ierr,i; 241 242 PetscFunctionBegin; 243 /* Check and see if we have sufficient memory */ 244 if ((stash->n + n) > stash->nmax) { 245 ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 246 } 247 for ( i=0; i<n; i++ ) { 248 stash->idx[stash->n] = row; 249 stash->idy[stash->n] = idxn[i]; 250 stash->array[stash->n] = values[i]; 251 stash->n++; 252 } 253 PetscFunctionReturn(0); 254 } 255 /* 256 MatStashValuesCol_Private - inserts values into the stash. This function 257 expects the values to be columnoriented. Multiple columns belong to the same row 258 can be inserted with a single call to this function. 259 260 Input Parameters: 261 stash - the stash 262 row - the global row correspoiding to the values 263 n - the number of elements inserted. All elements belong to the above row. 264 idxn - the global column indices corresponding to each of the values. 265 values - the values inserted 266 stepval - the consecutive values are sepated by a distance of stepval. 267 this happens because the input is columnoriented. 268 */ 269 #undef __FUNC__ 270 #define __FUNC__ "MatStashValuesCol_Private" 271 int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn, 272 Scalar *values,int stepval) 273 { 274 int ierr,i; 275 276 PetscFunctionBegin; 277 /* Check and see if we have sufficient memory */ 278 if ((stash->n + n) > stash->nmax) { 279 ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 280 } 281 for ( i=0; i<n; i++ ) { 282 stash->idx[stash->n] = row; 283 stash->idy[stash->n] = idxn[i]; 284 stash->array[stash->n] = values[i*stepval]; 285 stash->n++; 286 } 287 PetscFunctionReturn(0); 288 } 289 290 /* 291 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 292 This function expects the values to be roworiented. Multiple columns belong 293 to the same block-row can be inserted with a single call to this function. 294 This function extracts the sub-block of values based on the dimensions of 295 the original input block, and the row,col values corresponding to the blocks. 296 297 Input Parameters: 298 stash - the stash 299 row - the global block-row correspoiding to the values 300 n - the number of elements inserted. All elements belong to the above row. 301 idxn - the global block-column indices corresponding to each of the blocks of 302 values. Each block is of size bs*bs. 303 values - the values inserted 304 rmax - the number of block-rows in the original block. 305 cmax - the number of block-columsn on the original block. 306 idx - the index of the current block-row in the original block. 307 */ 308 #undef __FUNC__ 309 #define __FUNC__ "MatStashValuesRowBlocked_Private" 310 int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values, 311 int rmax,int cmax,int idx) 312 { 313 int ierr,i,j,k,bs2,bs=stash->bs; 314 Scalar *vals,*array; 315 316 PetscFunctionBegin; 317 bs2 = bs*bs; 318 if ((stash->n+n) > stash->nmax) { 319 ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 320 } 321 for ( i=0; i<n; i++ ) { 322 stash->idx[stash->n] = row; 323 stash->idy[stash->n] = idxn[i]; 324 /* Now copy over the block of values. Store the values column oriented. 325 This enables inserting multiple blocks belonging to a row with a single 326 funtion call */ 327 array = stash->array + bs2*stash->n; 328 vals = values + idx*bs2*n + bs*i; 329 for ( j=0; j<bs; j++ ) { 330 for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];} 331 array += 1; 332 vals += cmax*bs; 333 } 334 stash->n++; 335 } 336 PetscFunctionReturn(0); 337 } 338 339 /* 340 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 341 This function expects the values to be roworiented. Multiple columns belong 342 to the same block-row can be inserted with a single call to this function. 343 This function extracts the sub-block of values based on the dimensions of 344 the original input block, and the row,col values corresponding to the blocks. 345 346 Input Parameters: 347 stash - the stash 348 row - the global block-row correspoiding to the values 349 n - the number of elements inserted. All elements belong to the above row. 350 idxn - the global block-column indices corresponding to each of the blocks of 351 values. Each block is of size bs*bs. 352 values - the values inserted 353 rmax - the number of block-rows in the original block. 354 cmax - the number of block-columsn on the original block. 355 idx - the index of the current block-row in the original block. 356 */ 357 #undef __FUNC__ 358 #define __FUNC__ "MatStashValuesColBlocked_Private" 359 int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn, 360 Scalar *values,int rmax,int cmax,int idx) 361 { 362 int ierr,i,j,k,bs2,bs=stash->bs; 363 Scalar *vals,*array; 364 365 PetscFunctionBegin; 366 bs2 = bs*bs; 367 if ((stash->n+n) > stash->nmax) { 368 ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 369 } 370 for ( i=0; i<n; i++ ) { 371 stash->idx[stash->n] = row; 372 stash->idy[stash->n] = idxn[i]; 373 /* Now copy over the block of values. Store the values column oriented. 374 This enables inserting multiple blocks belonging to a row with a single 375 funtion call */ 376 array = stash->array + bs2*stash->n; 377 vals = values + idx*bs + bs2*rmax*i; 378 for ( j=0; j<bs; j++ ) { 379 for ( k=0; k<bs; k++ ) {array[k] = vals[k];} 380 array += bs; 381 vals += rmax*bs; 382 } 383 stash->n++; 384 } 385 PetscFunctionReturn(0); 386 } 387 /* 388 MatStashScatterBegin_Private - Initiates the transfer of values to the 389 correct owners. This function goes through the stash, and check the 390 owners of each stashed value, and sends the values off to the owner 391 processors. 392 393 Input Parameters: 394 stash - the stash 395 owners - an array of size 'no-of-procs' which gives the ownership range 396 for each node. 397 398 Notes: The 'owners' array in the cased of the blocked-stash has the 399 ranges specified blocked global indices, and for the regular stash in 400 the proper global indices. 401 */ 402 #undef __FUNC__ 403 #define __FUNC__ "MatStashScatterBegin_Private" 404 int MatStashScatterBegin_Private(MatStash *stash,int *owners) 405 { 406 int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 407 int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives; 408 int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx; 409 Scalar *rvalues,*svalues; 410 MPI_Comm comm = stash->comm; 411 MPI_Request *send_waits,*recv_waits; 412 413 PetscFunctionBegin; 414 415 bs2 = stash->bs*stash->bs; 416 /* first count number of contributors to each processor */ 417 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 418 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 419 owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner); 420 421 for ( i=0; i<stash->n; i++ ) { 422 idx = stash->idx[i]; 423 for ( j=0; j<size; j++ ) { 424 if (idx >= owners[j] && idx < owners[j+1]) { 425 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 426 } 427 } 428 } 429 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 430 431 /* inform other processors of number of messages and max length*/ 432 work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work); 433 ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 434 nreceives = work[rank]; 435 ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 436 nmax = work[rank]; 437 PetscFree(work); 438 /* post receives: 439 since we don't know how long each individual message is we 440 allocate the largest needed buffer for each receive. Potentially 441 this is a lot of wasted space. 442 */ 443 rvalues = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues); 444 rindices = (int *) (rvalues + bs2*nreceives*nmax); 445 recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits); 446 for ( i=0,count=0; i<nreceives; i++ ) { 447 ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm, 448 recv_waits+count++); CHKERRQ(ierr); 449 ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm, 450 recv_waits+count++); CHKERRQ(ierr); 451 } 452 453 /* do sends: 454 1) starts[i] gives the starting index in svalues for stuff going to 455 the ith processor 456 */ 457 svalues = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues); 458 sindices = (int *) (svalues + bs2*stash->n); 459 send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request)); 460 CHKPTRQ(send_waits); 461 startv = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv); 462 starti = startv + size; 463 /* use 2 sends the first with all_a, the next with all_i and all_j */ 464 startv[0] = 0; starti[0] = 0; 465 for ( i=1; i<size; i++ ) { 466 startv[i] = startv[i-1] + nprocs[i-1]; 467 starti[i] = starti[i-1] + nprocs[i-1]*2; 468 } 469 for ( i=0; i<stash->n; i++ ) { 470 j = owner[i]; 471 if (bs2 == 1) { 472 svalues[startv[j]] = stash->array[i]; 473 } else { 474 int k; 475 Scalar *buf1,*buf2; 476 buf1 = svalues+bs2*startv[j]; 477 buf2 = stash->array+bs2*i; 478 for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; } 479 } 480 sindices[starti[j]] = stash->idx[i]; 481 sindices[starti[j]+nprocs[j]] = stash->idy[i]; 482 startv[j]++; 483 starti[j]++; 484 } 485 startv[0] = 0; 486 for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];} 487 for ( i=0,count=0; i<size; i++ ) { 488 if (procs[i]) { 489 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm, 490 send_waits+count++);CHKERRQ(ierr); 491 ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 492 send_waits+count++);CHKERRQ(ierr); 493 } 494 } 495 PetscFree(owner); 496 PetscFree(startv); 497 /* This memory is reused in scatter end for a different purpose*/ 498 for (i=0; i<2*size; i++ ) nprocs[i] = -1; 499 stash->nprocs = nprocs; 500 501 stash->svalues = svalues; stash->rvalues = rvalues; 502 stash->nsends = nsends; stash->nrecvs = nreceives; 503 stash->send_waits = send_waits; stash->recv_waits = recv_waits; 504 stash->rmax = nmax; 505 PetscFunctionReturn(0); 506 } 507 508 /* 509 MatStashScatterGetMesg_Private - This function waits on the receives posted 510 in the function MatStashScatterBegin_Private() and returns one message at 511 a time to the calling function. If no messages are left, it indicates this 512 by setting flg = 0, else it sets flg = 1. 513 514 Input Parameters: 515 stash - the stash 516 517 Output Parameters: 518 nvals - the number of entries in the current message. 519 rows - an array of row indices (or blocked indices) corresponding to the values 520 cols - an array of columnindices (or blocked indices) corresponding to the values 521 vals - the values 522 flg - 0 indicates no more message left, and the current call has no values associated. 523 1 indicates that the current call successfully received a message, and the 524 other output parameters nvals,rows,cols,vals are set appropriately. 525 */ 526 #undef __FUNC__ 527 #define __FUNC__ "MatStashScatterGetMesg_Private" 528 int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg) 529 { 530 int i,ierr,size=stash->size,*flg_v,*flg_i; 531 int i1,i2,*rindices,match_found=0,bs2; 532 MPI_Status recv_status; 533 534 PetscFunctionBegin; 535 536 *flg = 0; /* When a message is discovered this is reset to 1 */ 537 /* Return if no more messages to process */ 538 if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 539 540 flg_v = stash->nprocs; 541 flg_i = flg_v + size; 542 bs2 = stash->bs*stash->bs; 543 /* If a matching pair of receieves are found, process them, and return the data to 544 the calling function. Until then keep receiving messages */ 545 while (!match_found) { 546 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 547 /* Now pack the received message into a structure which is useable by others */ 548 if (i % 2) { 549 ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 550 flg_i[recv_status.MPI_SOURCE] = i/2; 551 *nvals = *nvals/2; /* This message has both row indices and col indices */ 552 } else { 553 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 554 flg_v[recv_status.MPI_SOURCE] = i/2; 555 *nvals = *nvals/bs2; 556 } 557 558 /* Check if we have both the messages from this proc */ 559 i1 = flg_v[recv_status.MPI_SOURCE]; 560 i2 = flg_i[recv_status.MPI_SOURCE]; 561 if (i1 != -1 && i2 != -1) { 562 rindices = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs); 563 *rows = rindices + 2*i2*stash->rmax; 564 *cols = *rows + *nvals; 565 *vals = stash->rvalues + i1*bs2*stash->rmax; 566 *flg = 1; 567 stash->nprocessed ++; 568 match_found = 1; 569 } 570 } 571 PetscFunctionReturn(0); 572 } 573