1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.7 1995/03/06 04:04:51 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 #define CHUNCKSIZE 100 10 /* 11 This is a simple minded stash. Do a linear search to determine if 12 in stash, if not add to end. 13 */ 14 static int StashValues(Stash *stash,int row,int n, int *idxn, 15 Scalar *values,InsertMode addv) 16 { 17 int i,j,N = stash->n,found,*n_idx, *n_idy; 18 Scalar val,*n_array; 19 20 for ( i=0; i<n; i++ ) { 21 found = 0; 22 val = *values++; 23 for ( j=0; j<N; j++ ) { 24 if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) { 25 /* found a match */ 26 if (addv == AddValues) stash->array[j] += val; 27 else stash->array[j] = val; 28 found = 1; 29 break; 30 } 31 } 32 if (!found) { /* not found so add to end */ 33 if ( stash->n == stash->nmax ) { 34 /* allocate a larger stash */ 35 n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*( 36 2*sizeof(int) + sizeof(Scalar))); 37 CHKPTR(n_array); 38 n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE); 39 n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE); 40 MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar)); 41 MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int)); 42 MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int)); 43 if (stash->array) FREE(stash->array); 44 stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy; 45 stash->nmax += CHUNCKSIZE; 46 } 47 stash->array[stash->n] = val; 48 stash->idx[stash->n] = row; 49 stash->idy[stash->n++] = idxn[i]; 50 } 51 } 52 return 0; 53 } 54 55 static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n, 56 int *idxn,Scalar *v,InsertMode addv) 57 { 58 Matimpiaij *aij = (Matimpiaij *) mat->data; 59 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 60 int cstart = aij->cstart, cend = aij->cend,row,col; 61 62 if (aij->insertmode != NotSetValues && aij->insertmode != addv) { 63 SETERR(1,"You cannot mix inserts and adds"); 64 } 65 aij->insertmode = addv; 66 for ( i=0; i<m; i++ ) { 67 if (idxm[i] < 0) SETERR(1,"Negative row index"); 68 if (idxm[i] >= aij->M) SETERR(1,"Row index too large"); 69 if (idxm[i] >= rstart && idxm[i] < rend) { 70 row = idxm[i] - rstart; 71 for ( j=0; j<n; j++ ) { 72 if (idxn[j] < 0) SETERR(1,"Negative column index"); 73 if (idxn[j] >= aij->N) SETERR(1,"Column index too large"); 74 if (idxn[j] >= cstart && idxn[j] < cend){ 75 col = idxn[j] - cstart; 76 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 77 } 78 else { 79 if (aij->assembled) { 80 SETERR(1,"Cannot insert off diagonal block in already\ 81 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 82 if your need this feature"); 83 } 84 col = idxn[j]; 85 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 86 } 87 } 88 } 89 else { 90 ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr); 91 } 92 } 93 return 0; 94 } 95 96 /* 97 the assembly code is alot like the code for vectors, we should 98 sometime derive a single assembly code that can be used for 99 either case. 100 */ 101 102 static int MatiAIJBeginAssemble(Mat mat) 103 { 104 Matimpiaij *aij = (Matimpiaij *) mat->data; 105 MPI_Comm comm = mat->comm; 106 int numtids = aij->numtids, *owners = aij->rowners; 107 int mytid = aij->mytid; 108 MPI_Request *send_waits,*recv_waits; 109 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 110 int tag = 50, *owner,*starts,count; 111 InsertMode addv; 112 Scalar *rvalues,*svalues; 113 114 /* make sure all processors are either in INSERTMODE or ADDMODE */ 115 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 116 MPI_BOR,comm); 117 if (addv == (AddValues|InsertValues)) { 118 SETERR(1,"Some processors have inserted while others have added"); 119 } 120 aij->insertmode = addv; /* in case this processor had no cache */ 121 122 /* first count number of contributors to each processor */ 123 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 124 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 125 owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner); 126 for ( i=0; i<aij->stash.n; i++ ) { 127 idx = aij->stash.idx[i]; 128 for ( j=0; j<numtids; j++ ) { 129 if (idx >= owners[j] && idx < owners[j+1]) { 130 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 131 } 132 } 133 } 134 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 135 136 /* inform other processors of number of messages and max length*/ 137 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 138 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 139 nreceives = work[mytid]; 140 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 141 nmax = work[mytid]; 142 FREE(work); 143 144 /* post receives: 145 1) each message will consist of ordered pairs 146 (global index,value) we store the global index as a double 147 to simplify the message passing. 148 2) since we don't know how long each individual message is we 149 allocate the largest needed buffer for each receive. Potentially 150 this is a lot of wasted space. 151 152 153 This could be done better. 154 */ 155 rvalues = (Scalar *) MALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 156 CHKPTR(rvalues); 157 recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request)); 158 CHKPTR(recv_waits); 159 for ( i=0; i<nreceives; i++ ) { 160 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag, 161 comm,recv_waits+i); 162 } 163 164 /* do sends: 165 1) starts[i] gives the starting index in svalues for stuff going to 166 the ith processor 167 */ 168 svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 169 CHKPTR(svalues); 170 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 171 CHKPTR(send_waits); 172 starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts); 173 starts[0] = 0; 174 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 175 for ( i=0; i<aij->stash.n; i++ ) { 176 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 177 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 178 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 179 } 180 FREE(owner); 181 starts[0] = 0; 182 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 183 count = 0; 184 for ( i=0; i<numtids; i++ ) { 185 if (procs[i]) { 186 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag, 187 comm,send_waits+count++); 188 } 189 } 190 FREE(starts); FREE(nprocs); 191 192 /* Free cache space */ 193 aij->stash.nmax = aij->stash.n = 0; 194 if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;} 195 196 aij->svalues = svalues; aij->rvalues = rvalues; 197 aij->nsends = nsends; aij->nrecvs = nreceives; 198 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 199 aij->rmax = nmax; 200 201 return 0; 202 } 203 extern int MPIAIJSetUpMultiply(Mat); 204 205 static int MatiAIJEndAssemble(Mat mat) 206 { 207 int ierr; 208 Matimpiaij *aij = (Matimpiaij *) mat->data; 209 210 MPI_Status *send_status,recv_status; 211 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 212 int row,col; 213 Scalar *values,val; 214 InsertMode addv = aij->insertmode; 215 216 /* wait on receives */ 217 while (count) { 218 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 219 /* unpack receives into our local space */ 220 values = aij->rvalues + 3*imdex*aij->rmax; 221 MPI_Get_count(&recv_status,MPI_SCALAR,&n); 222 n = n/3; 223 for ( i=0; i<n; i++ ) { 224 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 225 col = (int) PETSCREAL(values[3*i+1]); 226 val = values[3*i+2]; 227 if (col >= aij->cstart && col < aij->cend) { 228 col -= aij->cstart; 229 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 230 } 231 else { 232 if (aij->assembled) { 233 SETERR(1,"Cannot insert off diagonal block in already\ 234 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 235 if your need this feature"); 236 } 237 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 238 } 239 } 240 count--; 241 } 242 FREE(aij->recv_waits); FREE(aij->rvalues); 243 244 /* wait on sends */ 245 if (aij->nsends) { 246 send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) ); 247 CHKPTR(send_status); 248 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 249 FREE(send_status); 250 } 251 FREE(aij->send_waits); FREE(aij->svalues); 252 253 aij->insertmode = NotSetValues; 254 ierr = MatBeginAssembly(aij->A); CHKERR(ierr); 255 ierr = MatEndAssembly(aij->A); CHKERR(ierr); 256 257 258 if (aij->assembled) { 259 /* this is because the present implimentation doesn't support 260 inserting values off the diagonal block.*/ 261 return 0; 262 } 263 264 ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr); 265 ierr = MatBeginAssembly(aij->B); CHKERR(ierr); 266 ierr = MatEndAssembly(aij->B); CHKERR(ierr); 267 268 aij->assembled = 1; 269 return 0; 270 } 271 272 static int MatiZero(Mat A) 273 { 274 Matimpiaij *l = (Matimpiaij *) A->data; 275 276 MatZeroEntries(l->A); MatZeroEntries(l->B); 277 return 0; 278 } 279 280 /* again this uses the same basic stratagy as in the assembly and 281 scatter create routines, we should try to do it systemamatically 282 if we can figure out the proper level of generality. */ 283 284 /* the code does not do the diagonal entries correctly unless the 285 matrix is square and the column and row owerships are identical. 286 This is a BUG. The only way to fix it seems to be to access 287 aij->A and aij->B directly and not through the MatZeroRows() 288 routine. 289 */ 290 static int MatiZerorows(Mat A,IS is,Scalar *diag) 291 { 292 Matimpiaij *l = (Matimpiaij *) A->data; 293 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 294 int *procs,*nprocs,j,found,idx,nsends,*work; 295 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 296 int *rvalues,tag = 67,count,base,slen,n,*source; 297 int *lens,imdex,*lrows,*values; 298 MPI_Comm comm = A->comm; 299 MPI_Request *send_waits,*recv_waits; 300 MPI_Status recv_status,*send_status; 301 IS istmp; 302 303 if (!l->assembled) SETERR(1,"MatiZerorows: must assmble matrix first"); 304 ierr = ISGetLocalSize(is,&N); CHKERR(ierr); 305 ierr = ISGetIndices(is,&rows); CHKERR(ierr); 306 307 /* first count number of contributors to each processor */ 308 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 309 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 310 owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/ 311 for ( i=0; i<N; i++ ) { 312 idx = rows[i]; 313 found = 0; 314 for ( j=0; j<numtids; j++ ) { 315 if (idx >= owners[j] && idx < owners[j+1]) { 316 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 317 } 318 } 319 if (!found) SETERR(1,"Imdex out of range"); 320 } 321 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 322 323 /* inform other processors of number of messages and max length*/ 324 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 325 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 326 nrecvs = work[mytid]; 327 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 328 nmax = work[mytid]; 329 FREE(work); 330 331 /* post receives: */ 332 rvalues = (int *) MALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 333 CHKPTR(rvalues); 334 recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request)); 335 CHKPTR(recv_waits); 336 for ( i=0; i<nrecvs; i++ ) { 337 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 338 comm,recv_waits+i); 339 } 340 341 /* do sends: 342 1) starts[i] gives the starting index in svalues for stuff going to 343 the ith processor 344 */ 345 svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues); 346 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 347 CHKPTR(send_waits); 348 starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts); 349 starts[0] = 0; 350 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 351 for ( i=0; i<N; i++ ) { 352 svalues[starts[owner[i]]++] = rows[i]; 353 } 354 ISRestoreIndices(is,&rows); 355 356 starts[0] = 0; 357 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 358 count = 0; 359 for ( i=0; i<numtids; i++ ) { 360 if (procs[i]) { 361 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 362 comm,send_waits+count++); 363 } 364 } 365 FREE(starts); 366 367 base = owners[mytid]; 368 369 /* wait on receives */ 370 lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens); 371 source = lens + nrecvs; 372 count = nrecvs; slen = 0; 373 while (count) { 374 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 375 /* unpack receives into our local space */ 376 MPI_Get_count(&recv_status,MPI_INT,&n); 377 source[imdex] = recv_status.MPI_SOURCE; 378 lens[imdex] = n; 379 slen += n; 380 count--; 381 } 382 FREE(recv_waits); 383 384 /* move the data into the send scatter */ 385 lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows); 386 count = 0; 387 for ( i=0; i<nrecvs; i++ ) { 388 values = rvalues + i*nmax; 389 for ( j=0; j<lens[i]; j++ ) { 390 lrows[count++] = values[j] - base; 391 } 392 } 393 FREE(rvalues); FREE(lens); 394 FREE(owner); FREE(nprocs); 395 396 /* actually zap the local rows */ 397 ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr); FREE(lrows); 398 ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr); 399 ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr); 400 ierr = ISDestroy(istmp); CHKERR(ierr); 401 402 /* wait on sends */ 403 if (nsends) { 404 send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) ); 405 CHKPTR(send_status); 406 MPI_Waitall(nsends,send_waits,send_status); 407 FREE(send_status); 408 } 409 FREE(send_waits); FREE(svalues); 410 411 412 return 0; 413 } 414 415 static int MatiAIJMult(Mat aijin,Vec xx,Vec yy) 416 { 417 Matimpiaij *aij = (Matimpiaij *) aijin->data; 418 int ierr; 419 if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first"); 420 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 421 CHKERR(ierr); 422 ierr = MatMult(aij->A,xx,yy); CHKERR(ierr); 423 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 424 CHKERR(ierr); 425 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr); 426 return 0; 427 } 428 429 static int MatiAIJMultadd(Mat aijin,Vec xx,Vec yy,Vec zz) 430 { 431 Matimpiaij *aij = (Matimpiaij *) aijin->data; 432 int ierr; 433 if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first"); 434 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 435 CHKERR(ierr); 436 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr); 437 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 438 CHKERR(ierr); 439 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr); 440 return 0; 441 } 442 443 static int MatiAIJMultTrans(Mat aijin,Vec xx,Vec yy) 444 { 445 Matimpiaij *aij = (Matimpiaij *) aijin->data; 446 int ierr; 447 448 if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 449 /* do nondiagonal part */ 450 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 451 /* send it on its way */ 452 ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues, 453 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 454 /* do local part */ 455 ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr); 456 /* receive remote parts: note this assumes the values are not actually */ 457 /* inserted in yy until the next line, which is true for my implementation*/ 458 /* but is not perhaps always true. */ 459 ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse, 460 aij->Mvctx); CHKERR(ierr); 461 return 0; 462 } 463 464 static int MatiAIJMultTransadd(Mat aijin,Vec xx,Vec yy,Vec zz) 465 { 466 Matimpiaij *aij = (Matimpiaij *) aijin->data; 467 int ierr; 468 469 if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 470 /* do nondiagonal part */ 471 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 472 /* send it on its way */ 473 ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues, 474 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 475 /* do local part */ 476 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr); 477 /* receive remote parts: note this assumes the values are not actually */ 478 /* inserted in yy until the next line, which is true for my implementation*/ 479 /* but is not perhaps always true. */ 480 ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse, 481 aij->Mvctx); CHKERR(ierr); 482 return 0; 483 } 484 485 /* 486 This only works correctly for square matrices where the subblock A->A is the 487 diagonal block 488 */ 489 static int MatiAIJgetdiag(Mat Ain,Vec v) 490 { 491 Matimpiaij *A = (Matimpiaij *) Ain->data; 492 if (!A->assembled) SETERR(1,"MatiAIJgetdiag: must assmble matrix first"); 493 return MatGetDiagonal(A->A,v); 494 } 495 496 static int MatiAIJdestroy(PetscObject obj) 497 { 498 Mat mat = (Mat) obj; 499 Matimpiaij *aij = (Matimpiaij *) mat->data; 500 int ierr; 501 FREE(aij->rowners); 502 ierr = MatDestroy(aij->A); CHKERR(ierr); 503 ierr = MatDestroy(aij->B); CHKERR(ierr); 504 FREE(aij); FREE(mat); 505 if (aij->lvec) VecDestroy(aij->lvec); 506 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 507 return 0; 508 } 509 510 static int MatiView(PetscObject obj,Viewer viewer) 511 { 512 Mat mat = (Mat) obj; 513 Matimpiaij *aij = (Matimpiaij *) mat->data; 514 int ierr; 515 516 if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 517 MPE_Seq_begin(mat->comm,1); 518 printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 519 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 520 aij->cend); 521 ierr = MatView(aij->A,0); CHKERR(ierr); 522 ierr = MatView(aij->B,0); CHKERR(ierr); 523 fflush(stdout); 524 MPE_Seq_end(mat->comm,1); 525 return 0; 526 } 527 528 extern int MatiAIJmarkdiag(Matiaij *); 529 /* 530 This has to provide several versions. 531 532 1) per sequential 533 2) a) use only local smoothing updating outer values only once. 534 b) local smoothing updating outer values each inner iteration 535 3) color updating out values betwen colors. 536 */ 537 static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift, 538 int its,Vec xx) 539 { 540 Matimpiaij *mat = (Matimpiaij *) matin->data; 541 Mat AA = mat->A, BB = mat->B; 542 Matiaij *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data; 543 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 544 int ierr,*idx, *diag; 545 int n = mat->n, m = mat->m, i; 546 Vec tt; 547 548 if (!mat->assembled) SETERR(1,"MatiAIJRelax: must assmble matrix first"); 549 550 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 551 xs = x -1; /* shift by one for index start of 1 */ 552 ls--; 553 if (!A->diag) {if ((ierr = MatiAIJmarkdiag(A))) return ierr;} 554 diag = A->diag; 555 if (flag & SOR_EISENSTAT) { 556 /* Let A = L + U + D; where L is lower trianglar, 557 U is upper triangular, E is diagonal; This routine applies 558 559 (L + E)^{-1} A (U + E)^{-1} 560 561 to a vector efficiently using Eisenstat's trick. This is for 562 the case of SSOR preconditioner, so E is D/omega where omega 563 is the relaxation factor. 564 */ 565 ierr = VecCreate(xx,&tt); CHKERR(ierr); 566 VecGetArray(tt,&t); 567 scale = (2.0/omega) - 1.0; 568 /* x = (E + U)^{-1} b */ 569 VecSet(&zero,mat->lvec); 570 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 571 mat->Mvctx); CHKERR(ierr); 572 for ( i=m-1; i>-1; i-- ) { 573 n = A->i[i+1] - diag[i] - 1; 574 idx = A->j + diag[i]; 575 v = A->a + diag[i]; 576 sum = b[i]; 577 SPARSEDENSEMDOT(sum,xs,v,idx,n); 578 d = shift + A->a[diag[i]-1]; 579 n = B->i[i+1] - B->i[i]; 580 idx = B->j + B->i[i] - 1; 581 v = B->a + B->i[i] - 1; 582 SPARSEDENSEMDOT(sum,ls,v,idx,n); 583 x[i] = omega*(sum/d); 584 } 585 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 586 mat->Mvctx); CHKERR(ierr); 587 588 /* t = b - (2*E - D)x */ 589 v = A->a; 590 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 591 592 /* t = (E + L)^{-1}t */ 593 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 594 diag = A->diag; 595 VecSet(&zero,mat->lvec); 596 ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown, 597 mat->Mvctx); CHKERR(ierr); 598 for ( i=0; i<m; i++ ) { 599 n = diag[i] - A->i[i]; 600 idx = A->j + A->i[i] - 1; 601 v = A->a + A->i[i] - 1; 602 sum = t[i]; 603 SPARSEDENSEMDOT(sum,ts,v,idx,n); 604 d = shift + A->a[diag[i]-1]; 605 n = B->i[i+1] - B->i[i]; 606 idx = B->j + B->i[i] - 1; 607 v = B->a + B->i[i] - 1; 608 SPARSEDENSEMDOT(sum,ls,v,idx,n); 609 t[i] = omega*(sum/d); 610 } 611 ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown, 612 mat->Mvctx); CHKERR(ierr); 613 /* x = x + t */ 614 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 615 VecDestroy(tt); 616 return 0; 617 } 618 619 620 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 621 if (flag & SOR_ZERO_INITIAL_GUESS) { 622 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 623 } 624 else { 625 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 626 CHKERR(ierr); 627 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 628 CHKERR(ierr); 629 } 630 while (its--) { 631 /* go down through the rows */ 632 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 633 mat->Mvctx); CHKERR(ierr); 634 for ( i=0; i<m; i++ ) { 635 n = A->i[i+1] - A->i[i]; 636 idx = A->j + A->i[i] - 1; 637 v = A->a + A->i[i] - 1; 638 sum = b[i]; 639 SPARSEDENSEMDOT(sum,xs,v,idx,n); 640 d = shift + A->a[diag[i]-1]; 641 n = B->i[i+1] - B->i[i]; 642 idx = B->j + B->i[i] - 1; 643 v = B->a + B->i[i] - 1; 644 SPARSEDENSEMDOT(sum,ls,v,idx,n); 645 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 646 } 647 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 648 mat->Mvctx); CHKERR(ierr); 649 /* come up through the rows */ 650 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 651 mat->Mvctx); CHKERR(ierr); 652 for ( i=m-1; i>-1; i-- ) { 653 n = A->i[i+1] - A->i[i]; 654 idx = A->j + A->i[i] - 1; 655 v = A->a + A->i[i] - 1; 656 sum = b[i]; 657 SPARSEDENSEMDOT(sum,xs,v,idx,n); 658 d = shift + A->a[diag[i]-1]; 659 n = B->i[i+1] - B->i[i]; 660 idx = B->j + B->i[i] - 1; 661 v = B->a + B->i[i] - 1; 662 SPARSEDENSEMDOT(sum,ls,v,idx,n); 663 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 664 } 665 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 666 mat->Mvctx); CHKERR(ierr); 667 } 668 } 669 else if (flag & SOR_FORWARD_SWEEP){ 670 if (flag & SOR_ZERO_INITIAL_GUESS) { 671 VecSet(&zero,mat->lvec); 672 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 673 mat->Mvctx); CHKERR(ierr); 674 for ( i=0; i<m; i++ ) { 675 n = diag[i] - A->i[i]; 676 idx = A->j + A->i[i] - 1; 677 v = A->a + A->i[i] - 1; 678 sum = b[i]; 679 SPARSEDENSEMDOT(sum,xs,v,idx,n); 680 d = shift + A->a[diag[i]-1]; 681 n = B->i[i+1] - B->i[i]; 682 idx = B->j + B->i[i] - 1; 683 v = B->a + B->i[i] - 1; 684 SPARSEDENSEMDOT(sum,ls,v,idx,n); 685 x[i] = omega*(sum/d); 686 } 687 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 688 mat->Mvctx); CHKERR(ierr); 689 its--; 690 } 691 while (its--) { 692 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 693 CHKERR(ierr); 694 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 695 CHKERR(ierr); 696 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 697 mat->Mvctx); CHKERR(ierr); 698 for ( i=0; i<m; i++ ) { 699 n = A->i[i+1] - A->i[i]; 700 idx = A->j + A->i[i] - 1; 701 v = A->a + A->i[i] - 1; 702 sum = b[i]; 703 SPARSEDENSEMDOT(sum,xs,v,idx,n); 704 d = shift + A->a[diag[i]-1]; 705 n = B->i[i+1] - B->i[i]; 706 idx = B->j + B->i[i] - 1; 707 v = B->a + B->i[i] - 1; 708 SPARSEDENSEMDOT(sum,ls,v,idx,n); 709 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 710 } 711 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 712 mat->Mvctx); CHKERR(ierr); 713 } 714 } 715 else if (flag & SOR_BACKWARD_SWEEP){ 716 if (flag & SOR_ZERO_INITIAL_GUESS) { 717 VecSet(&zero,mat->lvec); 718 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 719 mat->Mvctx); CHKERR(ierr); 720 for ( i=m-1; i>-1; i-- ) { 721 n = A->i[i+1] - diag[i] - 1; 722 idx = A->j + diag[i]; 723 v = A->a + diag[i]; 724 sum = b[i]; 725 SPARSEDENSEMDOT(sum,xs,v,idx,n); 726 d = shift + A->a[diag[i]-1]; 727 n = B->i[i+1] - B->i[i]; 728 idx = B->j + B->i[i] - 1; 729 v = B->a + B->i[i] - 1; 730 SPARSEDENSEMDOT(sum,ls,v,idx,n); 731 x[i] = omega*(sum/d); 732 } 733 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 734 mat->Mvctx); CHKERR(ierr); 735 its--; 736 } 737 while (its--) { 738 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown, 739 mat->Mvctx); CHKERR(ierr); 740 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown, 741 mat->Mvctx); CHKERR(ierr); 742 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 743 mat->Mvctx); CHKERR(ierr); 744 for ( i=m-1; i>-1; i-- ) { 745 n = A->i[i+1] - A->i[i]; 746 idx = A->j + A->i[i] - 1; 747 v = A->a + A->i[i] - 1; 748 sum = b[i]; 749 SPARSEDENSEMDOT(sum,xs,v,idx,n); 750 d = shift + A->a[diag[i]-1]; 751 n = B->i[i+1] - B->i[i]; 752 idx = B->j + B->i[i] - 1; 753 v = B->a + B->i[i] - 1; 754 SPARSEDENSEMDOT(sum,ls,v,idx,n); 755 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 756 } 757 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 758 mat->Mvctx); CHKERR(ierr); 759 } 760 } 761 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 762 if (flag & SOR_ZERO_INITIAL_GUESS) { 763 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 764 } 765 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 766 CHKERR(ierr); 767 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 768 CHKERR(ierr); 769 while (its--) { 770 /* go down through the rows */ 771 for ( i=0; i<m; i++ ) { 772 n = A->i[i+1] - A->i[i]; 773 idx = A->j + A->i[i] - 1; 774 v = A->a + A->i[i] - 1; 775 sum = b[i]; 776 SPARSEDENSEMDOT(sum,xs,v,idx,n); 777 d = shift + A->a[diag[i]-1]; 778 n = B->i[i+1] - B->i[i]; 779 idx = B->j + B->i[i] - 1; 780 v = B->a + B->i[i] - 1; 781 SPARSEDENSEMDOT(sum,ls,v,idx,n); 782 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 783 } 784 /* come up through the rows */ 785 for ( i=m-1; i>-1; i-- ) { 786 n = A->i[i+1] - A->i[i]; 787 idx = A->j + A->i[i] - 1; 788 v = A->a + A->i[i] - 1; 789 sum = b[i]; 790 SPARSEDENSEMDOT(sum,xs,v,idx,n); 791 d = shift + A->a[diag[i]-1]; 792 n = B->i[i+1] - B->i[i]; 793 idx = B->j + B->i[i] - 1; 794 v = B->a + B->i[i] - 1; 795 SPARSEDENSEMDOT(sum,ls,v,idx,n); 796 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 797 } 798 } 799 } 800 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 801 if (flag & SOR_ZERO_INITIAL_GUESS) { 802 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 803 } 804 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 805 CHKERR(ierr); 806 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 807 CHKERR(ierr); 808 while (its--) { 809 for ( i=0; i<m; i++ ) { 810 n = A->i[i+1] - A->i[i]; 811 idx = A->j + A->i[i] - 1; 812 v = A->a + A->i[i] - 1; 813 sum = b[i]; 814 SPARSEDENSEMDOT(sum,xs,v,idx,n); 815 d = shift + A->a[diag[i]-1]; 816 n = B->i[i+1] - B->i[i]; 817 idx = B->j + B->i[i] - 1; 818 v = B->a + B->i[i] - 1; 819 SPARSEDENSEMDOT(sum,ls,v,idx,n); 820 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 821 } 822 } 823 } 824 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 825 if (flag & SOR_ZERO_INITIAL_GUESS) { 826 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 827 } 828 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll, 829 mat->Mvctx); CHKERR(ierr); 830 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll, 831 mat->Mvctx); CHKERR(ierr); 832 while (its--) { 833 for ( i=m-1; i>-1; i-- ) { 834 n = A->i[i+1] - A->i[i]; 835 idx = A->j + A->i[i] - 1; 836 v = A->a + A->i[i] - 1; 837 sum = b[i]; 838 SPARSEDENSEMDOT(sum,xs,v,idx,n); 839 d = shift + A->a[diag[i]-1]; 840 n = B->i[i+1] - B->i[i]; 841 idx = B->j + B->i[i] - 1; 842 v = B->a + B->i[i] - 1; 843 SPARSEDENSEMDOT(sum,ls,v,idx,n); 844 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 845 } 846 } 847 } 848 return 0; 849 } 850 static int MatiAIJinsopt(Mat aijin,int op) 851 { 852 Matimpiaij *aij = (Matimpiaij *) aijin->data; 853 854 if (op == NO_NEW_NONZERO_LOCATIONS) { 855 MatSetOption(aij->A,op); 856 MatSetOption(aij->B,op); 857 } 858 else if (op == YES_NEW_NONZERO_LOCATIONS) { 859 MatSetOption(aij->A,op); 860 MatSetOption(aij->B,op); 861 } 862 else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported"); 863 return 0; 864 } 865 866 static int MatiAIJsize(Mat matin,int *m,int *n) 867 { 868 Matimpiaij *mat = (Matimpiaij *) matin->data; 869 *m = mat->M; *n = mat->N; 870 return 0; 871 } 872 873 static int MatiAIJlocalsize(Mat matin,int *m,int *n) 874 { 875 Matimpiaij *mat = (Matimpiaij *) matin->data; 876 *m = mat->m; *n = mat->n; 877 return 0; 878 } 879 880 static int MatiAIJrange(Mat matin,int *m,int *n) 881 { 882 Matimpiaij *mat = (Matimpiaij *) matin->data; 883 *m = mat->rstart; *n = mat->rend; 884 return 0; 885 } 886 887 static int MatiCopy(Mat,Mat *); 888 889 /* -------------------------------------------------------------------*/ 890 static struct _MatOps MatOps = {MatiAIJInsertValues, 891 0, 0, 892 MatiAIJMult,MatiAIJMultadd,MatiAIJMultTrans,MatiAIJMultTransadd, 893 0,0,0,0, 894 0,0, 895 MatiAIJrelax, 896 0, 897 0,0,0, 898 MatiCopy, 899 MatiAIJgetdiag,0,0, 900 MatiAIJBeginAssemble,MatiAIJEndAssemble, 901 0, 902 MatiAIJinsopt,MatiZero,MatiZerorows,0, 903 0,0,0,0, 904 MatiAIJsize,MatiAIJlocalsize,MatiAIJrange }; 905 906 907 908 /*@ 909 910 MatCreateMPIAIJ - Creates a sparse parallel matrix 911 in AIJ format. 912 913 Input Parameters: 914 . comm - MPI communicator 915 . m,n - number of local rows and columns (or -1 to have calculated) 916 . M,N - global rows and columns (or -1 to have calculated) 917 . d_nz - total number nonzeros in diagonal portion of matrix 918 . d_nzz - number of nonzeros per row in diagonal portion of matrix or null 919 . You must leave room for the diagonal entry even if it is zero. 920 . o_nz - total number nonzeros in off-diagonal portion of matrix 921 . o_nzz - number of nonzeros per row in off-diagonal portion of matrix 922 . or null. You must have at least one nonzero per row. 923 924 Output parameters: 925 . newmat - the matrix 926 927 Keywords: matrix, aij, compressed row, sparse, parallel 928 @*/ 929 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 930 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 931 { 932 Mat mat; 933 Matimpiaij *aij; 934 int ierr, i,sum[2],work[2]; 935 *newmat = 0; 936 CREATEHEADER(mat,_Mat); 937 mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 938 mat->cookie = MAT_COOKIE; 939 mat->ops = &MatOps; 940 mat->destroy = MatiAIJdestroy; 941 mat->view = MatiView; 942 mat->type = MATAIJMPI; 943 mat->factor = 0; 944 mat->row = 0; 945 mat->col = 0; 946 947 mat->comm = comm; 948 aij->insertmode = NotSetValues; 949 MPI_Comm_rank(comm,&aij->mytid); 950 MPI_Comm_size(comm,&aij->numtids); 951 952 if (M == -1 || N == -1) { 953 work[0] = m; work[1] = n; 954 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 955 if (M == -1) M = sum[0]; 956 if (N == -1) N = sum[1]; 957 } 958 if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 959 if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 960 aij->m = m; 961 aij->n = n; 962 aij->N = N; 963 aij->M = M; 964 965 /* build local table of row and column ownerships */ 966 aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int)); 967 CHKPTR(aij->rowners); 968 aij->cowners = aij->rowners + aij->numtids + 1; 969 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 970 aij->rowners[0] = 0; 971 for ( i=2; i<=aij->numtids; i++ ) { 972 aij->rowners[i] += aij->rowners[i-1]; 973 } 974 aij->rstart = aij->rowners[aij->mytid]; 975 aij->rend = aij->rowners[aij->mytid+1]; 976 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 977 aij->cowners[0] = 0; 978 for ( i=2; i<=aij->numtids; i++ ) { 979 aij->cowners[i] += aij->cowners[i-1]; 980 } 981 aij->cstart = aij->cowners[aij->mytid]; 982 aij->cend = aij->cowners[aij->mytid+1]; 983 984 985 ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr); 986 ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr); 987 988 /* build cache for off array entries formed */ 989 aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */ 990 aij->stash.n = 0; 991 aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) + 992 sizeof(Scalar))); CHKPTR(aij->stash.array); 993 aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax); 994 aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax); 995 996 /* stuff used for matrix vector multiply */ 997 aij->lvec = 0; 998 aij->Mvctx = 0; 999 aij->assembled = 0; 1000 1001 *newmat = mat; 1002 return 0; 1003 } 1004 1005 static int MatiCopy(Mat matin,Mat *newmat) 1006 { 1007 Mat mat; 1008 Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data; 1009 int ierr; 1010 *newmat = 0; 1011 1012 if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix"); 1013 CREATEHEADER(mat,_Mat); 1014 mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 1015 mat->cookie = MAT_COOKIE; 1016 mat->ops = &MatOps; 1017 mat->destroy = MatiAIJdestroy; 1018 mat->view = MatiView; 1019 mat->type = MATAIJMPI; 1020 mat->factor = matin->factor; 1021 mat->row = 0; 1022 mat->col = 0; 1023 1024 aij->m = oldmat->m; 1025 aij->n = oldmat->n; 1026 aij->M = oldmat->M; 1027 aij->N = oldmat->N; 1028 1029 aij->assembled = 1; 1030 aij->rstart = oldmat->rstart; 1031 aij->rend = oldmat->rend; 1032 aij->cstart = oldmat->cstart; 1033 aij->cend = oldmat->cend; 1034 aij->numtids = oldmat->numtids; 1035 aij->mytid = oldmat->mytid; 1036 aij->insertmode = NotSetValues; 1037 1038 aij->rowners = (int *) MALLOC( (aij->numtids+1)*sizeof(int) ); 1039 CHKPTR(aij->rowners); 1040 MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1041 aij->stash.nmax = 0; 1042 aij->stash.n = 0; 1043 aij->stash.array= 0; 1044 mat->comm = matin->comm; 1045 1046 ierr = VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr); 1047 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr); 1048 ierr = MatCopy(oldmat->A,&aij->A); CHKERR(ierr); 1049 ierr = MatCopy(oldmat->B,&aij->B); CHKERR(ierr); 1050 *newmat = mat; 1051 return 0; 1052 } 1053