1 2 #include "mpiaij.h" 3 #include "vec/vecimpl.h" 4 #include "inline/spops.h" 5 6 #define CHUNCKSIZE 100 7 /* 8 This is a simple minded stash. Do a linear search to determine if 9 in stash, if not add to end. 10 */ 11 static int StashValues(Stash *stash,int row,int n, int *idxn, 12 Scalar *values,InsertMode addv) 13 { 14 int i,j,N = stash->n,found,*n_idx, *n_idy; 15 Scalar val,*n_array; 16 17 for ( i=0; i<n; i++ ) { 18 found = 0; 19 val = *values++; 20 for ( j=0; j<N; j++ ) { 21 if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) { 22 /* found a match */ 23 if (addv == AddValues) stash->array[j] += val; 24 else stash->array[j] = val; 25 found = 1; 26 break; 27 } 28 } 29 if (!found) { /* not found so add to end */ 30 if ( stash->n == stash->nmax ) { 31 /* allocate a larger stash */ 32 n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*( 33 2*sizeof(int) + sizeof(Scalar))); 34 CHKPTR(n_array); 35 n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE); 36 n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE); 37 MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar)); 38 MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int)); 39 MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int)); 40 if (stash->array) FREE(stash->array); 41 stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy; 42 stash->nmax += CHUNCKSIZE; 43 } 44 stash->array[stash->n] = val; 45 stash->idx[stash->n] = row; 46 stash->idy[stash->n++] = idxn[i]; 47 } 48 } 49 return 0; 50 } 51 52 static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n, 53 int *idxn,Scalar *v,InsertMode addv) 54 { 55 Matimpiaij *aij = (Matimpiaij *) mat->data; 56 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 57 int cstart = aij->cstart, cend = aij->cend,row,col; 58 59 if (aij->insertmode != NotSetValues && aij->insertmode != addv) { 60 SETERR(1,"You cannot mix inserts and adds"); 61 } 62 aij->insertmode = addv; 63 for ( i=0; i<m; i++ ) { 64 if (idxm[i] < 0) continue; 65 if (idxm[i] >= rstart && idxm[i] < rend) { 66 row = idxm[i] - rstart; 67 for ( j=0; j<n; j++ ) { 68 if (idxn[j] < 0) continue; 69 if (idxn[j] >= cstart && idxn[j] < cend){ 70 col = idxn[j] - cstart; 71 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 72 } 73 else { 74 if (aij->assembled) { 75 SETERR(1,"Cannot insert off diagonal block in already\ 76 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 77 if your need this feature"); 78 } 79 col = idxn[j]; 80 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 81 } 82 } 83 } 84 else { 85 ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr); 86 } 87 } 88 return 0; 89 } 90 91 /* 92 the assembly code is alot like the code for vectors, we should 93 sometime derive a single assembly code that can be used for 94 either case. 95 */ 96 97 static int MatiAIJBeginAssemble(Mat mat) 98 { 99 Matimpiaij *aij = (Matimpiaij *) mat->data; 100 MPI_Comm comm = mat->comm; 101 int ierr, numtids = aij->numtids, *owners = aij->rowners; 102 int mytid = aij->mytid; 103 MPI_Request *send_waits,*recv_waits; 104 int *nprocs,i,j,n,idx,*procs,nsends,nreceives,nmax,*work; 105 int tag = 50, *owner,*starts,count; 106 InsertMode addv; 107 Scalar *rvalues,*svalues; 108 109 /* make sure all processors are either in INSERTMODE or ADDMODE */ 110 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,numtids,MPI_INT, 111 MPI_BOR,comm); 112 if (addv == (AddValues|InsertValues)) { 113 SETERR(1,"Some processors have inserted while others have added"); 114 } 115 aij->insertmode = addv; /* in case this processor had no cache */ 116 117 /* first count number of contributors to each processor */ 118 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 119 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 120 owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner); 121 for ( i=0; i<aij->stash.n; i++ ) { 122 idx = aij->stash.idx[i]; 123 for ( j=0; j<numtids; j++ ) { 124 if (idx >= owners[j] && idx < owners[j+1]) { 125 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 126 } 127 } 128 } 129 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 130 131 /* inform other processors of number of messages and max length*/ 132 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 133 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 134 nreceives = work[mytid]; 135 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 136 nmax = work[mytid]; 137 FREE(work); 138 139 /* post receives: 140 1) each message will consist of ordered pairs 141 (global index,value) we store the global index as a double 142 to simplify the message passing. 143 2) since we don't know how long each individual message is we 144 allocate the largest needed buffer for each receive. Potentially 145 this is a lot of wasted space. 146 147 148 This could be done better. 149 */ 150 rvalues = (Scalar *) MALLOC(3*(nreceives+1)*nmax*sizeof(Scalar)); 151 CHKPTR(rvalues); 152 recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request)); 153 CHKPTR(recv_waits); 154 for ( i=0; i<nreceives; i++ ) { 155 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag, 156 comm,recv_waits+i); 157 } 158 159 /* do sends: 160 1) starts[i] gives the starting index in svalues for stuff going to 161 the ith processor 162 */ 163 svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 164 CHKPTR(svalues); 165 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 166 CHKPTR(send_waits); 167 starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts); 168 starts[0] = 0; 169 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 170 for ( i=0; i<aij->stash.n; i++ ) { 171 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 172 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 173 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 174 } 175 FREE(owner); 176 starts[0] = 0; 177 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 178 count = 0; 179 for ( i=0; i<numtids; i++ ) { 180 if (procs[i]) { 181 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag, 182 comm,send_waits+count++); 183 } 184 } 185 FREE(starts); FREE(nprocs); 186 187 /* Free cache space */ 188 aij->stash.nmax = aij->stash.n = 0; 189 if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;} 190 191 aij->svalues = svalues; aij->rvalues = rvalues; 192 aij->nsends = nsends; aij->nrecvs = nreceives; 193 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 194 aij->rmax = nmax; 195 196 return 0; 197 } 198 extern int MPIAIJSetUpMultiply(Mat); 199 200 static int MatiAIJEndAssemble(Mat mat) 201 { 202 int ierr; 203 Matimpiaij *aij = (Matimpiaij *) mat->data; 204 205 MPI_Status *send_status,recv_status; 206 int imdex,idx,nrecvs = aij->nrecvs, count = nrecvs, i, n; 207 int row,col; 208 Scalar *values,val; 209 InsertMode addv = aij->insertmode; 210 211 /* wait on receives */ 212 while (count) { 213 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 214 /* unpack receives into our local space */ 215 values = aij->rvalues + 3*imdex*aij->rmax; 216 MPI_Get_count(&recv_status,MPI_SCALAR,&n); 217 n = n/3; 218 for ( i=0; i<n; i++ ) { 219 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 220 col = (int) PETSCREAL(values[3*i+1]); 221 val = values[3*i+2]; 222 if (col >= aij->cstart && col < aij->cend) { 223 col -= aij->cstart; 224 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 225 } 226 else { 227 if (aij->assembled) { 228 SETERR(1,"Cannot insert off diagonal block in already\ 229 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 230 if your need this feature"); 231 } 232 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 233 } 234 } 235 count--; 236 } 237 FREE(aij->recv_waits); FREE(aij->rvalues); 238 239 /* wait on sends */ 240 if (aij->nsends) { 241 send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) ); 242 CHKPTR(send_status); 243 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 244 FREE(send_status); 245 } 246 FREE(aij->send_waits); FREE(aij->svalues); 247 248 aij->insertmode = NotSetValues; 249 ierr = MatBeginAssembly(aij->A); CHKERR(ierr); 250 ierr = MatEndAssembly(aij->A); CHKERR(ierr); 251 252 253 if (aij->assembled) { 254 /* this is because the present implimentation doesn't support 255 inserting values off the diagonal block.*/ 256 return 0; 257 } 258 259 ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr); 260 ierr = MatBeginAssembly(aij->B); CHKERR(ierr); 261 ierr = MatEndAssembly(aij->B); CHKERR(ierr); 262 263 aij->assembled = 1; 264 return 0; 265 } 266 267 static int MatiZero(Mat A) 268 { 269 Matimpiaij *l = (Matimpiaij *) A->data; 270 271 MatZeroEntries(l->A); MatZeroEntries(l->B); 272 return 0; 273 } 274 275 /* again this uses the same basic stratagy as in the assembly and 276 scatter create routines, we should try to do it systemamatically 277 if we can figure out the proper level of generality. */ 278 279 /* the code does not do the diagonal entries correctly unless the 280 matrix is square and the column and row owerships are identical. 281 This is a BUG. The only way to fix it seems to be to access 282 aij->A and aij->B directly and not through the MatZeroRows() 283 routine. 284 */ 285 static int MatiZerorows(Mat A,IS is,Scalar *diag) 286 { 287 Matimpiaij *l = (Matimpiaij *) A->data; 288 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 289 int *localrows,*procs,*nprocs,j,found,idx,nsends,*work; 290 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 291 int *rvalues,tag = 67,count,base,slen,n,len,*source; 292 int *lens,imdex,*lrows,*values; 293 MPI_Comm comm = A->comm; 294 MPI_Request *send_waits,*recv_waits; 295 MPI_Status recv_status,*send_status; 296 IS istmp; 297 298 ierr = ISGetLocalSize(is,&N); CHKERR(ierr); 299 ierr = ISGetIndices(is,&rows); CHKERR(ierr); 300 301 /* first count number of contributors to each processor */ 302 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 303 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 304 owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/ 305 for ( i=0; i<N; i++ ) { 306 idx = rows[i]; 307 found = 0; 308 for ( j=0; j<numtids; j++ ) { 309 if (idx >= owners[j] && idx < owners[j+1]) { 310 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 311 } 312 } 313 if (!found) SETERR(1,"Imdex out of range"); 314 } 315 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 316 317 /* inform other processors of number of messages and max length*/ 318 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 319 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 320 nrecvs = work[mytid]; 321 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 322 nmax = work[mytid]; 323 FREE(work); 324 325 /* post receives: */ 326 rvalues = (int *) MALLOC((nrecvs+1)*nmax*sizeof(int)); /*see note */ 327 CHKPTR(rvalues); 328 recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request)); 329 CHKPTR(recv_waits); 330 for ( i=0; i<nrecvs; i++ ) { 331 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 332 comm,recv_waits+i); 333 } 334 335 /* do sends: 336 1) starts[i] gives the starting index in svalues for stuff going to 337 the ith processor 338 */ 339 svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues); 340 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 341 CHKPTR(send_waits); 342 starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts); 343 starts[0] = 0; 344 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 345 for ( i=0; i<N; i++ ) { 346 svalues[starts[owner[i]]++] = rows[i]; 347 } 348 ISRestoreIndices(is,&rows); 349 350 starts[0] = 0; 351 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 352 count = 0; 353 for ( i=0; i<numtids; i++ ) { 354 if (procs[i]) { 355 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 356 comm,send_waits+count++); 357 } 358 } 359 FREE(starts); 360 361 base = owners[mytid]; 362 363 /* wait on receives */ 364 lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens); 365 source = lens + nrecvs; 366 count = nrecvs; slen = 0; 367 while (count) { 368 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 369 /* unpack receives into our local space */ 370 MPI_Get_count(&recv_status,MPI_INT,&n); 371 source[imdex] = recv_status.MPI_SOURCE; 372 lens[imdex] = n; 373 slen += n; 374 count--; 375 } 376 FREE(recv_waits); 377 378 /* move the data into the send scatter */ 379 lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows); 380 count = 0; 381 for ( i=0; i<nrecvs; i++ ) { 382 values = rvalues + i*nmax; 383 for ( j=0; j<lens[i]; j++ ) { 384 lrows[count++] = values[j] - base; 385 } 386 } 387 FREE(rvalues); FREE(lens); 388 FREE(owner); FREE(nprocs); 389 390 /* actually zap the local rows */ 391 ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr); FREE(lrows); 392 ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr); 393 ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr); 394 ierr = ISDestroy(istmp); CHKERR(ierr); 395 396 /* wait on sends */ 397 if (nsends) { 398 send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) ); 399 CHKPTR(send_status); 400 MPI_Waitall(nsends,send_waits,send_status); 401 FREE(send_status); 402 } 403 FREE(send_waits); FREE(svalues); 404 405 406 return 0; 407 } 408 409 static int MatiAIJMult(Mat aijin,Vec xx,Vec yy) 410 { 411 Matimpiaij *aij = (Matimpiaij *) aijin->data; 412 int ierr; 413 414 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 415 CHKERR(ierr); 416 ierr = MatMult(aij->A,xx,yy); CHKERR(ierr); 417 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 418 CHKERR(ierr); 419 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr); 420 return 0; 421 } 422 423 /* 424 This only works correctly for square matrices where the subblock A->A is the 425 diagonal block 426 */ 427 static int MatiAIJgetdiag(Mat Ain,Vec v) 428 { 429 Matimpiaij *A = (Matimpiaij *) Ain->data; 430 return MatGetDiagonal(A->A,v); 431 } 432 433 static int MatiAIJdestroy(PetscObject obj) 434 { 435 Mat mat = (Mat) obj; 436 Matimpiaij *aij = (Matimpiaij *) mat->data; 437 int ierr; 438 FREE(aij->rowners); 439 ierr = MatDestroy(aij->A); CHKERR(ierr); 440 ierr = MatDestroy(aij->B); CHKERR(ierr); 441 FREE(aij); FREE(mat); 442 if (aij->lvec) VecDestroy(aij->lvec); 443 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 444 return 0; 445 } 446 447 static int MatiView(PetscObject obj,Viewer viewer) 448 { 449 Mat mat = (Mat) obj; 450 Matimpiaij *aij = (Matimpiaij *) mat->data; 451 int ierr; 452 453 MPE_Seq_begin(mat->comm,1); 454 printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 455 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 456 aij->cend); 457 ierr = MatView(aij->A,viewer); CHKERR(ierr); 458 ierr = MatView(aij->B,viewer); CHKERR(ierr); 459 MPE_Seq_end(mat->comm,1); 460 return 0; 461 } 462 463 extern int MatiAIJmarkdiag(Matiaij *); 464 /* 465 This has to provide several versions. 466 467 1) per sequential 468 2) a) use only local smoothing updating outer values only once. 469 b) local smoothing updating outer values each inner iteration 470 3) color updating out values betwen colors. 471 */ 472 static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift, 473 int its,Vec xx) 474 { 475 Matimpiaij *mat = (Matimpiaij *) matin->data; 476 Mat AA = mat->A, BB = mat->B; 477 Matiaij *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data; 478 Scalar zero = 0.0,*b,*x,*w,*xs,*ls,d,*v,sum; 479 int ierr,one = 1, tmp, *idx, *diag,mytid; 480 int n = mat->n, m = mat->m, i, j; 481 482 if (flag & SOR_ZERO_INITIAL_GUESS) { 483 /* could be more efficient by incorporating with below */ 484 if (ierr = VecSet(&zero,xx)) return ierr; 485 } 486 487 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 488 xs = x - 1; /* shift by one for index start of 1 */ 489 ls--;; 490 if (!A->diag) {if (ierr = MatiAIJmarkdiag(A)) return ierr;} 491 diag = A->diag; 492 493 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 494 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 495 CHKERR(ierr); 496 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 497 CHKERR(ierr); 498 while (its--) { 499 /* go down through the rows */ 500 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 501 mat->Mvctx); CHKERR(ierr); 502 for ( i=0; i<m; i++ ) { 503 n = A->i[i+1] - A->i[i]; 504 idx = A->j + A->i[i] - 1; 505 v = A->a + A->i[i] - 1; 506 sum = b[i]; 507 SPARSEDENSEMDOT(sum,xs,v,idx,n); 508 d = shift + A->a[diag[i]-1]; 509 n = B->i[i+1] - B->i[i]; 510 idx = B->j + B->i[i] - 1; 511 v = B->a + B->i[i] - 1; 512 SPARSEDENSEMDOT(sum,ls,v,idx,n); 513 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 514 } 515 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 516 mat->Mvctx); CHKERR(ierr); 517 /* come up through the rows */ 518 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 519 mat->Mvctx); CHKERR(ierr); 520 for ( i=m-1; i>-1; i-- ) { 521 n = A->i[i+1] - A->i[i]; 522 idx = A->j + A->i[i] - 1; 523 v = A->a + A->i[i] - 1; 524 sum = b[i]; 525 SPARSEDENSEMDOT(sum,xs,v,idx,n); 526 d = shift + A->a[diag[i]-1]; 527 n = B->i[i+1] - B->i[i]; 528 idx = B->j + B->i[i] - 1; 529 v = B->a + B->i[i] - 1; 530 SPARSEDENSEMDOT(sum,ls,v,idx,n); 531 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 532 } 533 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 534 mat->Mvctx); CHKERR(ierr); 535 } 536 } 537 else if (flag & SOR_FORWARD_SWEEP){ 538 while (its--) { 539 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 540 CHKERR(ierr); 541 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 542 CHKERR(ierr); 543 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 544 mat->Mvctx); CHKERR(ierr); 545 for ( i=0; i<m; i++ ) { 546 n = A->i[i+1] - A->i[i]; 547 idx = A->j + A->i[i] - 1; 548 v = A->a + A->i[i] - 1; 549 sum = b[i]; 550 SPARSEDENSEMDOT(sum,xs,v,idx,n); 551 d = shift + A->a[diag[i]-1]; 552 n = B->i[i+1] - B->i[i]; 553 idx = B->j + B->i[i] - 1; 554 v = B->a + B->i[i] - 1; 555 SPARSEDENSEMDOT(sum,ls,v,idx,n); 556 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 557 } 558 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 559 mat->Mvctx); CHKERR(ierr); 560 } 561 } 562 else if (flag & SOR_BACKWARD_SWEEP){ 563 while (its--) { 564 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown, 565 mat->Mvctx); CHKERR(ierr); 566 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown, 567 mat->Mvctx); CHKERR(ierr); 568 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 569 mat->Mvctx); CHKERR(ierr); 570 for ( i=m-1; i>-1; i-- ) { 571 n = A->i[i+1] - A->i[i]; 572 idx = A->j + A->i[i] - 1; 573 v = A->a + A->i[i] - 1; 574 sum = b[i]; 575 SPARSEDENSEMDOT(sum,xs,v,idx,n); 576 d = shift + A->a[diag[i]-1]; 577 n = B->i[i+1] - B->i[i]; 578 idx = B->j + B->i[i] - 1; 579 v = B->a + B->i[i] - 1; 580 SPARSEDENSEMDOT(sum,ls,v,idx,n); 581 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 582 } 583 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 584 mat->Mvctx); CHKERR(ierr); 585 } 586 } 587 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 588 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 589 CHKERR(ierr); 590 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 591 CHKERR(ierr); 592 while (its--) { 593 /* go down through the rows */ 594 for ( i=0; i<m; i++ ) { 595 n = A->i[i+1] - A->i[i]; 596 idx = A->j + A->i[i] - 1; 597 v = A->a + A->i[i] - 1; 598 sum = b[i]; 599 SPARSEDENSEMDOT(sum,xs,v,idx,n); 600 d = shift + A->a[diag[i]-1]; 601 n = B->i[i+1] - B->i[i]; 602 idx = B->j + B->i[i] - 1; 603 v = B->a + B->i[i] - 1; 604 SPARSEDENSEMDOT(sum,ls,v,idx,n); 605 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 606 } 607 /* come up through the rows */ 608 for ( i=m-1; i>-1; i-- ) { 609 n = A->i[i+1] - A->i[i]; 610 idx = A->j + A->i[i] - 1; 611 v = A->a + A->i[i] - 1; 612 sum = b[i]; 613 SPARSEDENSEMDOT(sum,xs,v,idx,n); 614 d = shift + A->a[diag[i]-1]; 615 n = B->i[i+1] - B->i[i]; 616 idx = B->j + B->i[i] - 1; 617 v = B->a + B->i[i] - 1; 618 SPARSEDENSEMDOT(sum,ls,v,idx,n); 619 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 620 } 621 } 622 } 623 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 624 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 625 CHKERR(ierr); 626 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 627 CHKERR(ierr); 628 while (its--) { 629 for ( i=0; i<m; i++ ) { 630 n = A->i[i+1] - A->i[i]; 631 idx = A->j + A->i[i] - 1; 632 v = A->a + A->i[i] - 1; 633 sum = b[i]; 634 SPARSEDENSEMDOT(sum,xs,v,idx,n); 635 d = shift + A->a[diag[i]-1]; 636 n = B->i[i+1] - B->i[i]; 637 idx = B->j + B->i[i] - 1; 638 v = B->a + B->i[i] - 1; 639 SPARSEDENSEMDOT(sum,ls,v,idx,n); 640 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 641 } 642 } 643 } 644 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 645 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll, 646 mat->Mvctx); CHKERR(ierr); 647 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll, 648 mat->Mvctx); CHKERR(ierr); 649 while (its--) { 650 for ( i=m-1; i>-1; i-- ) { 651 n = A->i[i+1] - A->i[i]; 652 idx = A->j + A->i[i] - 1; 653 v = A->a + A->i[i] - 1; 654 sum = b[i]; 655 SPARSEDENSEMDOT(sum,xs,v,idx,n); 656 d = shift + A->a[diag[i]-1]; 657 n = B->i[i+1] - B->i[i]; 658 idx = B->j + B->i[i] - 1; 659 v = B->a + B->i[i] - 1; 660 SPARSEDENSEMDOT(sum,ls,v,idx,n); 661 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 662 } 663 } 664 } 665 return 0; 666 } 667 static int MatiAIJinsopt(Mat aijin,int op) 668 { 669 Matimpiaij *aij = (Matimpiaij *) aijin->data; 670 671 if (op == NO_NEW_NONZERO_LOCATIONS) { 672 MatSetOption(aij->A,op); 673 MatSetOption(aij->B,op); 674 } 675 else if (op == YES_NEW_NONZERO_LOCATIONS) { 676 MatSetOption(aij->A,op); 677 MatSetOption(aij->B,op); 678 } 679 else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported"); 680 return 0; 681 } 682 683 static int MatiAIJsize(Mat matin,int *m,int *n) 684 { 685 Matimpiaij *mat = (Matimpiaij *) matin->data; 686 *m = mat->M; *n = mat->N; 687 return 0; 688 } 689 690 static int MatiAIJlocalsize(Mat matin,int *m,int *n) 691 { 692 Matimpiaij *mat = (Matimpiaij *) matin->data; 693 *m = mat->m; *n = mat->n; 694 return 0; 695 } 696 697 static int MatiAIJrange(Mat matin,int *m,int *n) 698 { 699 Matimpiaij *mat = (Matimpiaij *) matin->data; 700 *m = mat->rstart; *n = mat->rend; 701 return 0; 702 } 703 704 static int MatiCopy(Mat,Mat *); 705 706 /* -------------------------------------------------------------------*/ 707 static struct _MatOps MatOps = {MatiAIJInsertValues, 708 0, 0, 709 MatiAIJMult,0,0,0, 710 0,0,0,0, 711 0,0, 712 MatiAIJrelax, 713 0, 714 0,0,0, 715 MatiCopy, 716 MatiAIJgetdiag,0,0, 717 MatiAIJBeginAssemble,MatiAIJEndAssemble, 718 0, 719 MatiAIJinsopt,MatiZero,MatiZerorows,0, 720 0,0,0,0, 721 MatiAIJsize,MatiAIJlocalsize,MatiAIJrange }; 722 723 724 725 /*@ 726 727 MatCreateMPIAIJ - Creates a sparse parallel matrix 728 in AIJ format. 729 730 Input Parameters: 731 . comm - MPI communicator 732 . m,n - number of local rows and columns (or -1 to have calculated) 733 . M,N - global rows and columns (or -1 to have calculated) 734 . d_nz - total number nonzeros in diagonal portion of matrix 735 . d_nzz - number of nonzeros per row in diagonal portion of matrix or null 736 . You must leave room for the diagonal entry even if it is zero. 737 . o_nz - total number nonzeros in off-diagonal portion of matrix 738 . o_nzz - number of nonzeros per row in off-diagonal portion of matrix 739 . or null. You must have at least one nonzero per row. 740 741 Output parameters: 742 . newmat - the matrix 743 744 Keywords: matrix, aij, compressed row, sparse, parallel 745 @*/ 746 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 747 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 748 { 749 Mat mat; 750 Matimpiaij *aij; 751 int ierr, i,rl,len,sum[2],work[2]; 752 *newmat = 0; 753 CREATEHEADER(mat,_Mat); 754 mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 755 mat->cookie = MAT_COOKIE; 756 mat->ops = &MatOps; 757 mat->destroy = MatiAIJdestroy; 758 mat->view = MatiView; 759 mat->type = MATAIJMPI; 760 mat->factor = 0; 761 mat->row = 0; 762 mat->col = 0; 763 mat->outofrange = 0; 764 mat->Mlow = 0; 765 mat->Mhigh = M; 766 mat->Nlow = 0; 767 mat->Nhigh = N; 768 769 770 mat->comm = comm; 771 aij->insertmode = NotSetValues; 772 MPI_Comm_rank(comm,&aij->mytid); 773 MPI_Comm_size(comm,&aij->numtids); 774 775 if (M == -1 || N == -1) { 776 work[0] = m; work[1] = n; 777 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 778 if (M == -1) M = sum[0]; 779 if (N == -1) N = sum[1]; 780 } 781 if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 782 if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 783 aij->m = m; 784 aij->n = n; 785 aij->N = N; 786 aij->M = M; 787 788 /* build local table of row and column ownerships */ 789 aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int)); 790 CHKPTR(aij->rowners); 791 aij->cowners = aij->rowners + aij->numtids + 1; 792 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 793 aij->rowners[0] = 0; 794 for ( i=2; i<=aij->numtids; i++ ) { 795 aij->rowners[i] += aij->rowners[i-1]; 796 } 797 aij->rstart = aij->rowners[aij->mytid]; 798 aij->rend = aij->rowners[aij->mytid+1]; 799 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 800 aij->cowners[0] = 0; 801 for ( i=2; i<=aij->numtids; i++ ) { 802 aij->cowners[i] += aij->cowners[i-1]; 803 } 804 aij->cstart = aij->cowners[aij->mytid]; 805 aij->cend = aij->cowners[aij->mytid+1]; 806 807 808 ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr); 809 ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr); 810 811 /* build cache for off array entries formed */ 812 aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */ 813 aij->stash.n = 0; 814 aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) + 815 sizeof(Scalar))); CHKPTR(aij->stash.array); 816 aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax); 817 aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax); 818 819 /* stuff used for matrix vector multiply */ 820 aij->lvec = 0; 821 aij->Mvctx = 0; 822 aij->assembled = 0; 823 824 *newmat = mat; 825 return 0; 826 } 827 828 static int MatiCopy(Mat matin,Mat *newmat) 829 { 830 Mat mat; 831 Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data; 832 int i,rl,len, m = oldmat->m, n = oldmat->n,ierr; 833 *newmat = 0; 834 835 if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix"); 836 CREATEHEADER(mat,_Mat); 837 mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 838 mat->cookie = MAT_COOKIE; 839 mat->ops = &MatOps; 840 mat->destroy = MatiAIJdestroy; 841 mat->view = MatiView; 842 mat->type = MATAIJSEQ; 843 mat->factor = matin->factor; 844 mat->row = 0; 845 mat->col = 0; 846 mat->outofrange = matin->outofrange; 847 mat->Mlow = matin->Mlow; 848 mat->Mhigh = matin->Mhigh; 849 mat->Nlow = matin->Nlow; 850 mat->Nhigh = matin->Nhigh; 851 852 aij->m = oldmat->m; 853 aij->n = oldmat->n; 854 aij->M = oldmat->M; 855 aij->N = oldmat->N; 856 857 aij->assembled = 1; 858 aij->rstart = oldmat->rstart; 859 aij->rend = oldmat->rend; 860 aij->cstart = oldmat->cstart; 861 aij->cend = oldmat->cend; 862 aij->numtids = oldmat->numtids; 863 aij->mytid = oldmat->mytid; 864 aij->insertmode = NotSetValues; 865 866 aij->rowners = (int *) MALLOC( (aij->numtids+1)*sizeof(int) ); 867 CHKPTR(aij->rowners); 868 MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 869 aij->stash.nmax = 0; 870 aij->stash.n = 0; 871 aij->stash.array= 0; 872 mat->comm = matin->comm; 873 874 ierr = VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr); 875 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr); 876 ierr = MatCopy(oldmat->A,&aij->A); CHKERR(ierr); 877 ierr = MatCopy(oldmat->B,&aij->B); CHKERR(ierr); 878 *newmat = mat; 879 return 0; 880 } 881