1 #ifndef lint 2 static char vcid[] = "$Id: $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 #define CHUNCKSIZE 100 10 /* 11 This is a simple minded stash. Do a linear search to determine if 12 in stash, if not add to end. 13 */ 14 static int StashValues(Stash *stash,int row,int n, int *idxn, 15 Scalar *values,InsertMode addv) 16 { 17 int i,j,N = stash->n,found,*n_idx, *n_idy; 18 Scalar val,*n_array; 19 20 for ( i=0; i<n; i++ ) { 21 found = 0; 22 val = *values++; 23 for ( j=0; j<N; j++ ) { 24 if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) { 25 /* found a match */ 26 if (addv == AddValues) stash->array[j] += val; 27 else stash->array[j] = val; 28 found = 1; 29 break; 30 } 31 } 32 if (!found) { /* not found so add to end */ 33 if ( stash->n == stash->nmax ) { 34 /* allocate a larger stash */ 35 n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*( 36 2*sizeof(int) + sizeof(Scalar))); 37 CHKPTR(n_array); 38 n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE); 39 n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE); 40 MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar)); 41 MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int)); 42 MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int)); 43 if (stash->array) FREE(stash->array); 44 stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy; 45 stash->nmax += CHUNCKSIZE; 46 } 47 stash->array[stash->n] = val; 48 stash->idx[stash->n] = row; 49 stash->idy[stash->n++] = idxn[i]; 50 } 51 } 52 return 0; 53 } 54 55 static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n, 56 int *idxn,Scalar *v,InsertMode addv) 57 { 58 Matimpiaij *aij = (Matimpiaij *) mat->data; 59 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 60 int cstart = aij->cstart, cend = aij->cend,row,col; 61 62 if (aij->insertmode != NotSetValues && aij->insertmode != addv) { 63 SETERR(1,"You cannot mix inserts and adds"); 64 } 65 aij->insertmode = addv; 66 for ( i=0; i<m; i++ ) { 67 if (idxm[i] < 0) SETERR(1,"Negative row index"); 68 if (idxm[i] >= aij->M) SETERR(1,"Row index too large"); 69 if (idxm[i] >= rstart && idxm[i] < rend) { 70 row = idxm[i] - rstart; 71 for ( j=0; j<n; j++ ) { 72 if (idxn[j] < 0) SETERR(1,"Negative column index"); 73 if (idxn[j] >= aij->N) SETERR(1,"Column index too large"); 74 if (idxn[j] >= cstart && idxn[j] < cend){ 75 col = idxn[j] - cstart; 76 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 77 } 78 else { 79 if (aij->assembled) { 80 SETERR(1,"Cannot insert off diagonal block in already\ 81 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 82 if your need this feature"); 83 } 84 col = idxn[j]; 85 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 86 } 87 } 88 } 89 else { 90 ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr); 91 } 92 } 93 return 0; 94 } 95 96 /* 97 the assembly code is alot like the code for vectors, we should 98 sometime derive a single assembly code that can be used for 99 either case. 100 */ 101 102 static int MatiAIJBeginAssemble(Mat mat) 103 { 104 Matimpiaij *aij = (Matimpiaij *) mat->data; 105 MPI_Comm comm = mat->comm; 106 int numtids = aij->numtids, *owners = aij->rowners; 107 int mytid = aij->mytid; 108 MPI_Request *send_waits,*recv_waits; 109 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 110 int tag = 50, *owner,*starts,count; 111 InsertMode addv; 112 Scalar *rvalues,*svalues; 113 114 /* make sure all processors are either in INSERTMODE or ADDMODE */ 115 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,numtids,MPI_INT, 116 MPI_BOR,comm); 117 if (addv == (AddValues|InsertValues)) { 118 SETERR(1,"Some processors have inserted while others have added"); 119 } 120 aij->insertmode = addv; /* in case this processor had no cache */ 121 122 /* first count number of contributors to each processor */ 123 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 124 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 125 owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner); 126 for ( i=0; i<aij->stash.n; i++ ) { 127 idx = aij->stash.idx[i]; 128 for ( j=0; j<numtids; j++ ) { 129 if (idx >= owners[j] && idx < owners[j+1]) { 130 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 131 } 132 } 133 } 134 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 135 136 /* inform other processors of number of messages and max length*/ 137 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 138 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 139 nreceives = work[mytid]; 140 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 141 nmax = work[mytid]; 142 FREE(work); 143 144 /* post receives: 145 1) each message will consist of ordered pairs 146 (global index,value) we store the global index as a double 147 to simplify the message passing. 148 2) since we don't know how long each individual message is we 149 allocate the largest needed buffer for each receive. Potentially 150 this is a lot of wasted space. 151 152 153 This could be done better. 154 */ 155 rvalues = (Scalar *) MALLOC(3*(nreceives+1)*nmax*sizeof(Scalar)); 156 CHKPTR(rvalues); 157 recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request)); 158 CHKPTR(recv_waits); 159 for ( i=0; i<nreceives; i++ ) { 160 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag, 161 comm,recv_waits+i); 162 } 163 164 /* do sends: 165 1) starts[i] gives the starting index in svalues for stuff going to 166 the ith processor 167 */ 168 svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 169 CHKPTR(svalues); 170 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 171 CHKPTR(send_waits); 172 starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts); 173 starts[0] = 0; 174 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 175 for ( i=0; i<aij->stash.n; i++ ) { 176 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 177 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 178 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 179 } 180 FREE(owner); 181 starts[0] = 0; 182 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 183 count = 0; 184 for ( i=0; i<numtids; i++ ) { 185 if (procs[i]) { 186 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag, 187 comm,send_waits+count++); 188 } 189 } 190 FREE(starts); FREE(nprocs); 191 192 /* Free cache space */ 193 aij->stash.nmax = aij->stash.n = 0; 194 if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;} 195 196 aij->svalues = svalues; aij->rvalues = rvalues; 197 aij->nsends = nsends; aij->nrecvs = nreceives; 198 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 199 aij->rmax = nmax; 200 201 return 0; 202 } 203 extern int MPIAIJSetUpMultiply(Mat); 204 205 static int MatiAIJEndAssemble(Mat mat) 206 { 207 int ierr; 208 Matimpiaij *aij = (Matimpiaij *) mat->data; 209 210 MPI_Status *send_status,recv_status; 211 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 212 int row,col; 213 Scalar *values,val; 214 InsertMode addv = aij->insertmode; 215 216 /* wait on receives */ 217 while (count) { 218 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 219 /* unpack receives into our local space */ 220 values = aij->rvalues + 3*imdex*aij->rmax; 221 MPI_Get_count(&recv_status,MPI_SCALAR,&n); 222 n = n/3; 223 for ( i=0; i<n; i++ ) { 224 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 225 col = (int) PETSCREAL(values[3*i+1]); 226 val = values[3*i+2]; 227 if (col >= aij->cstart && col < aij->cend) { 228 col -= aij->cstart; 229 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 230 } 231 else { 232 if (aij->assembled) { 233 SETERR(1,"Cannot insert off diagonal block in already\ 234 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 235 if your need this feature"); 236 } 237 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 238 } 239 } 240 count--; 241 } 242 FREE(aij->recv_waits); FREE(aij->rvalues); 243 244 /* wait on sends */ 245 if (aij->nsends) { 246 send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) ); 247 CHKPTR(send_status); 248 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 249 FREE(send_status); 250 } 251 FREE(aij->send_waits); FREE(aij->svalues); 252 253 aij->insertmode = NotSetValues; 254 ierr = MatBeginAssembly(aij->A); CHKERR(ierr); 255 ierr = MatEndAssembly(aij->A); CHKERR(ierr); 256 257 258 if (aij->assembled) { 259 /* this is because the present implimentation doesn't support 260 inserting values off the diagonal block.*/ 261 return 0; 262 } 263 264 ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr); 265 ierr = MatBeginAssembly(aij->B); CHKERR(ierr); 266 ierr = MatEndAssembly(aij->B); CHKERR(ierr); 267 268 aij->assembled = 1; 269 return 0; 270 } 271 272 static int MatiZero(Mat A) 273 { 274 Matimpiaij *l = (Matimpiaij *) A->data; 275 276 MatZeroEntries(l->A); MatZeroEntries(l->B); 277 return 0; 278 } 279 280 /* again this uses the same basic stratagy as in the assembly and 281 scatter create routines, we should try to do it systemamatically 282 if we can figure out the proper level of generality. */ 283 284 /* the code does not do the diagonal entries correctly unless the 285 matrix is square and the column and row owerships are identical. 286 This is a BUG. The only way to fix it seems to be to access 287 aij->A and aij->B directly and not through the MatZeroRows() 288 routine. 289 */ 290 static int MatiZerorows(Mat A,IS is,Scalar *diag) 291 { 292 Matimpiaij *l = (Matimpiaij *) A->data; 293 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 294 int *procs,*nprocs,j,found,idx,nsends,*work; 295 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 296 int *rvalues,tag = 67,count,base,slen,n,*source; 297 int *lens,imdex,*lrows,*values; 298 MPI_Comm comm = A->comm; 299 MPI_Request *send_waits,*recv_waits; 300 MPI_Status recv_status,*send_status; 301 IS istmp; 302 303 if (!l->assembled) SETERR(1,"MatiZerorows: must assmble matrix first"); 304 ierr = ISGetLocalSize(is,&N); CHKERR(ierr); 305 ierr = ISGetIndices(is,&rows); CHKERR(ierr); 306 307 /* first count number of contributors to each processor */ 308 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 309 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 310 owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/ 311 for ( i=0; i<N; i++ ) { 312 idx = rows[i]; 313 found = 0; 314 for ( j=0; j<numtids; j++ ) { 315 if (idx >= owners[j] && idx < owners[j+1]) { 316 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 317 } 318 } 319 if (!found) SETERR(1,"Imdex out of range"); 320 } 321 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 322 323 /* inform other processors of number of messages and max length*/ 324 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 325 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 326 nrecvs = work[mytid]; 327 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 328 nmax = work[mytid]; 329 FREE(work); 330 331 /* post receives: */ 332 rvalues = (int *) MALLOC((nrecvs+1)*nmax*sizeof(int)); /*see note */ 333 CHKPTR(rvalues); 334 recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request)); 335 CHKPTR(recv_waits); 336 for ( i=0; i<nrecvs; i++ ) { 337 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 338 comm,recv_waits+i); 339 } 340 341 /* do sends: 342 1) starts[i] gives the starting index in svalues for stuff going to 343 the ith processor 344 */ 345 svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues); 346 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 347 CHKPTR(send_waits); 348 starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts); 349 starts[0] = 0; 350 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 351 for ( i=0; i<N; i++ ) { 352 svalues[starts[owner[i]]++] = rows[i]; 353 } 354 ISRestoreIndices(is,&rows); 355 356 starts[0] = 0; 357 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 358 count = 0; 359 for ( i=0; i<numtids; i++ ) { 360 if (procs[i]) { 361 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 362 comm,send_waits+count++); 363 } 364 } 365 FREE(starts); 366 367 base = owners[mytid]; 368 369 /* wait on receives */ 370 lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens); 371 source = lens + nrecvs; 372 count = nrecvs; slen = 0; 373 while (count) { 374 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 375 /* unpack receives into our local space */ 376 MPI_Get_count(&recv_status,MPI_INT,&n); 377 source[imdex] = recv_status.MPI_SOURCE; 378 lens[imdex] = n; 379 slen += n; 380 count--; 381 } 382 FREE(recv_waits); 383 384 /* move the data into the send scatter */ 385 lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows); 386 count = 0; 387 for ( i=0; i<nrecvs; i++ ) { 388 values = rvalues + i*nmax; 389 for ( j=0; j<lens[i]; j++ ) { 390 lrows[count++] = values[j] - base; 391 } 392 } 393 FREE(rvalues); FREE(lens); 394 FREE(owner); FREE(nprocs); 395 396 /* actually zap the local rows */ 397 ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr); FREE(lrows); 398 ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr); 399 ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr); 400 ierr = ISDestroy(istmp); CHKERR(ierr); 401 402 /* wait on sends */ 403 if (nsends) { 404 send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) ); 405 CHKPTR(send_status); 406 MPI_Waitall(nsends,send_waits,send_status); 407 FREE(send_status); 408 } 409 FREE(send_waits); FREE(svalues); 410 411 412 return 0; 413 } 414 415 static int MatiAIJMult(Mat aijin,Vec xx,Vec yy) 416 { 417 Matimpiaij *aij = (Matimpiaij *) aijin->data; 418 int ierr; 419 if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first"); 420 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 421 CHKERR(ierr); 422 ierr = MatMult(aij->A,xx,yy); CHKERR(ierr); 423 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 424 CHKERR(ierr); 425 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr); 426 return 0; 427 } 428 429 static int MatiAIJMultadd(Mat aijin,Vec xx,Vec yy,Vec zz) 430 { 431 Matimpiaij *aij = (Matimpiaij *) aijin->data; 432 int ierr; 433 if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first"); 434 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 435 CHKERR(ierr); 436 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr); 437 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 438 CHKERR(ierr); 439 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr); 440 return 0; 441 } 442 443 static int MatiAIJMultTrans(Mat aijin,Vec xx,Vec yy) 444 { 445 Matimpiaij *aij = (Matimpiaij *) aijin->data; 446 int ierr; 447 448 if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 449 /* do nondiagonal part */ 450 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 451 /* send it on its way */ 452 ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues, 453 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 454 /* do local part */ 455 ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr); 456 /* receive remote parts: note this assumes the values are not actually */ 457 /* inserted in yy until the next line, which is true for my implementation*/ 458 /* but is not perhaps always true. */ 459 ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse, 460 aij->Mvctx); CHKERR(ierr); 461 return 0; 462 } 463 464 static int MatiAIJMultTransadd(Mat aijin,Vec xx,Vec yy,Vec zz) 465 { 466 Matimpiaij *aij = (Matimpiaij *) aijin->data; 467 int ierr; 468 469 if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 470 /* do nondiagonal part */ 471 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 472 /* send it on its way */ 473 ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues, 474 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 475 /* do local part */ 476 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr); 477 /* receive remote parts: note this assumes the values are not actually */ 478 /* inserted in yy until the next line, which is true for my implementation*/ 479 /* but is not perhaps always true. */ 480 ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse, 481 aij->Mvctx); CHKERR(ierr); 482 return 0; 483 } 484 485 /* 486 This only works correctly for square matrices where the subblock A->A is the 487 diagonal block 488 */ 489 static int MatiAIJgetdiag(Mat Ain,Vec v) 490 { 491 Matimpiaij *A = (Matimpiaij *) Ain->data; 492 if (!A->assembled) SETERR(1,"MatiAIJgetdiag: must assmble matrix first"); 493 return MatGetDiagonal(A->A,v); 494 } 495 496 static int MatiAIJdestroy(PetscObject obj) 497 { 498 Mat mat = (Mat) obj; 499 Matimpiaij *aij = (Matimpiaij *) mat->data; 500 int ierr; 501 FREE(aij->rowners); 502 ierr = MatDestroy(aij->A); CHKERR(ierr); 503 ierr = MatDestroy(aij->B); CHKERR(ierr); 504 FREE(aij); FREE(mat); 505 if (aij->lvec) VecDestroy(aij->lvec); 506 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 507 return 0; 508 } 509 510 static int MatiView(PetscObject obj,Viewer viewer) 511 { 512 Mat mat = (Mat) obj; 513 Matimpiaij *aij = (Matimpiaij *) mat->data; 514 int ierr; 515 516 if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 517 MPE_Seq_begin(mat->comm,1); 518 printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 519 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 520 aij->cend); 521 ierr = MatView(aij->A,viewer); CHKERR(ierr); 522 ierr = MatView(aij->B,viewer); CHKERR(ierr); 523 MPE_Seq_end(mat->comm,1); 524 return 0; 525 } 526 527 extern int MatiAIJmarkdiag(Matiaij *); 528 /* 529 This has to provide several versions. 530 531 1) per sequential 532 2) a) use only local smoothing updating outer values only once. 533 b) local smoothing updating outer values each inner iteration 534 3) color updating out values betwen colors. 535 */ 536 static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift, 537 int its,Vec xx) 538 { 539 Matimpiaij *mat = (Matimpiaij *) matin->data; 540 Mat AA = mat->A, BB = mat->B; 541 Matiaij *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data; 542 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 543 int ierr,*idx, *diag; 544 int n = mat->n, m = mat->m, i; 545 Vec tt; 546 547 if (!mat->assembled) SETERR(1,"MatiAIJRelax: must assmble matrix first"); 548 549 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 550 xs = x -1; /* shift by one for index start of 1 */ 551 ls--; 552 if (!A->diag) {if ((ierr = MatiAIJmarkdiag(A))) return ierr;} 553 diag = A->diag; 554 if (flag & SOR_EISENSTAT) { 555 /* Let A = L + U + D; where L is lower trianglar, 556 U is upper triangular, E is diagonal; This routine applies 557 558 (L + E)^{-1} A (U + E)^{-1} 559 560 to a vector efficiently using Eisenstat's trick. This is for 561 the case of SSOR preconditioner, so E is D/omega where omega 562 is the relaxation factor. 563 */ 564 ierr = VecCreate(xx,&tt); CHKERR(ierr); 565 VecGetArray(tt,&t); 566 scale = (2.0/omega) - 1.0; 567 /* x = (E + U)^{-1} b */ 568 VecSet(&zero,mat->lvec); 569 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 570 mat->Mvctx); CHKERR(ierr); 571 for ( i=m-1; i>-1; i-- ) { 572 n = A->i[i+1] - diag[i] - 1; 573 idx = A->j + diag[i]; 574 v = A->a + diag[i]; 575 sum = b[i]; 576 SPARSEDENSEMDOT(sum,xs,v,idx,n); 577 d = shift + A->a[diag[i]-1]; 578 n = B->i[i+1] - B->i[i]; 579 idx = B->j + B->i[i] - 1; 580 v = B->a + B->i[i] - 1; 581 SPARSEDENSEMDOT(sum,ls,v,idx,n); 582 x[i] = omega*(sum/d); 583 } 584 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 585 mat->Mvctx); CHKERR(ierr); 586 587 /* t = b - (2*E - D)x */ 588 v = A->a; 589 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 590 591 /* t = (E + L)^{-1}t */ 592 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 593 diag = A->diag; 594 VecSet(&zero,mat->lvec); 595 ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown, 596 mat->Mvctx); CHKERR(ierr); 597 for ( i=0; i<m; i++ ) { 598 n = diag[i] - A->i[i]; 599 idx = A->j + A->i[i] - 1; 600 v = A->a + A->i[i] - 1; 601 sum = t[i]; 602 SPARSEDENSEMDOT(sum,ts,v,idx,n); 603 d = shift + A->a[diag[i]-1]; 604 n = B->i[i+1] - B->i[i]; 605 idx = B->j + B->i[i] - 1; 606 v = B->a + B->i[i] - 1; 607 SPARSEDENSEMDOT(sum,ls,v,idx,n); 608 t[i] = omega*(sum/d); 609 } 610 ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown, 611 mat->Mvctx); CHKERR(ierr); 612 /* x = x + t */ 613 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 614 VecDestroy(tt); 615 return 0; 616 } 617 618 619 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 620 if (flag & SOR_ZERO_INITIAL_GUESS) { 621 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 622 } 623 else { 624 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 625 CHKERR(ierr); 626 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 627 CHKERR(ierr); 628 } 629 while (its--) { 630 /* go down through the rows */ 631 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 632 mat->Mvctx); CHKERR(ierr); 633 for ( i=0; i<m; i++ ) { 634 n = A->i[i+1] - A->i[i]; 635 idx = A->j + A->i[i] - 1; 636 v = A->a + A->i[i] - 1; 637 sum = b[i]; 638 SPARSEDENSEMDOT(sum,xs,v,idx,n); 639 d = shift + A->a[diag[i]-1]; 640 n = B->i[i+1] - B->i[i]; 641 idx = B->j + B->i[i] - 1; 642 v = B->a + B->i[i] - 1; 643 SPARSEDENSEMDOT(sum,ls,v,idx,n); 644 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 645 } 646 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 647 mat->Mvctx); CHKERR(ierr); 648 /* come up through the rows */ 649 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 650 mat->Mvctx); CHKERR(ierr); 651 for ( i=m-1; i>-1; i-- ) { 652 n = A->i[i+1] - A->i[i]; 653 idx = A->j + A->i[i] - 1; 654 v = A->a + A->i[i] - 1; 655 sum = b[i]; 656 SPARSEDENSEMDOT(sum,xs,v,idx,n); 657 d = shift + A->a[diag[i]-1]; 658 n = B->i[i+1] - B->i[i]; 659 idx = B->j + B->i[i] - 1; 660 v = B->a + B->i[i] - 1; 661 SPARSEDENSEMDOT(sum,ls,v,idx,n); 662 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 663 } 664 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 665 mat->Mvctx); CHKERR(ierr); 666 } 667 } 668 else if (flag & SOR_FORWARD_SWEEP){ 669 if (flag & SOR_ZERO_INITIAL_GUESS) { 670 VecSet(&zero,mat->lvec); 671 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 672 mat->Mvctx); CHKERR(ierr); 673 for ( i=0; i<m; i++ ) { 674 n = diag[i] - A->i[i]; 675 idx = A->j + A->i[i] - 1; 676 v = A->a + A->i[i] - 1; 677 sum = b[i]; 678 SPARSEDENSEMDOT(sum,xs,v,idx,n); 679 d = shift + A->a[diag[i]-1]; 680 n = B->i[i+1] - B->i[i]; 681 idx = B->j + B->i[i] - 1; 682 v = B->a + B->i[i] - 1; 683 SPARSEDENSEMDOT(sum,ls,v,idx,n); 684 x[i] = omega*(sum/d); 685 } 686 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 687 mat->Mvctx); CHKERR(ierr); 688 its--; 689 } 690 while (its--) { 691 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 692 CHKERR(ierr); 693 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 694 CHKERR(ierr); 695 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 696 mat->Mvctx); CHKERR(ierr); 697 for ( i=0; i<m; i++ ) { 698 n = A->i[i+1] - A->i[i]; 699 idx = A->j + A->i[i] - 1; 700 v = A->a + A->i[i] - 1; 701 sum = b[i]; 702 SPARSEDENSEMDOT(sum,xs,v,idx,n); 703 d = shift + A->a[diag[i]-1]; 704 n = B->i[i+1] - B->i[i]; 705 idx = B->j + B->i[i] - 1; 706 v = B->a + B->i[i] - 1; 707 SPARSEDENSEMDOT(sum,ls,v,idx,n); 708 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 709 } 710 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 711 mat->Mvctx); CHKERR(ierr); 712 } 713 } 714 else if (flag & SOR_BACKWARD_SWEEP){ 715 if (flag & SOR_ZERO_INITIAL_GUESS) { 716 VecSet(&zero,mat->lvec); 717 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 718 mat->Mvctx); CHKERR(ierr); 719 for ( i=m-1; i>-1; i-- ) { 720 n = A->i[i+1] - diag[i] - 1; 721 idx = A->j + diag[i]; 722 v = A->a + diag[i]; 723 sum = b[i]; 724 SPARSEDENSEMDOT(sum,xs,v,idx,n); 725 d = shift + A->a[diag[i]-1]; 726 n = B->i[i+1] - B->i[i]; 727 idx = B->j + B->i[i] - 1; 728 v = B->a + B->i[i] - 1; 729 SPARSEDENSEMDOT(sum,ls,v,idx,n); 730 x[i] = omega*(sum/d); 731 } 732 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 733 mat->Mvctx); CHKERR(ierr); 734 its--; 735 } 736 while (its--) { 737 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown, 738 mat->Mvctx); CHKERR(ierr); 739 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown, 740 mat->Mvctx); CHKERR(ierr); 741 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 742 mat->Mvctx); CHKERR(ierr); 743 for ( i=m-1; i>-1; i-- ) { 744 n = A->i[i+1] - A->i[i]; 745 idx = A->j + A->i[i] - 1; 746 v = A->a + A->i[i] - 1; 747 sum = b[i]; 748 SPARSEDENSEMDOT(sum,xs,v,idx,n); 749 d = shift + A->a[diag[i]-1]; 750 n = B->i[i+1] - B->i[i]; 751 idx = B->j + B->i[i] - 1; 752 v = B->a + B->i[i] - 1; 753 SPARSEDENSEMDOT(sum,ls,v,idx,n); 754 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 755 } 756 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 757 mat->Mvctx); CHKERR(ierr); 758 } 759 } 760 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 761 if (flag & SOR_ZERO_INITIAL_GUESS) { 762 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 763 } 764 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 765 CHKERR(ierr); 766 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 767 CHKERR(ierr); 768 while (its--) { 769 /* go down through the rows */ 770 for ( i=0; i<m; i++ ) { 771 n = A->i[i+1] - A->i[i]; 772 idx = A->j + A->i[i] - 1; 773 v = A->a + A->i[i] - 1; 774 sum = b[i]; 775 SPARSEDENSEMDOT(sum,xs,v,idx,n); 776 d = shift + A->a[diag[i]-1]; 777 n = B->i[i+1] - B->i[i]; 778 idx = B->j + B->i[i] - 1; 779 v = B->a + B->i[i] - 1; 780 SPARSEDENSEMDOT(sum,ls,v,idx,n); 781 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 782 } 783 /* come up through the rows */ 784 for ( i=m-1; i>-1; i-- ) { 785 n = A->i[i+1] - A->i[i]; 786 idx = A->j + A->i[i] - 1; 787 v = A->a + A->i[i] - 1; 788 sum = b[i]; 789 SPARSEDENSEMDOT(sum,xs,v,idx,n); 790 d = shift + A->a[diag[i]-1]; 791 n = B->i[i+1] - B->i[i]; 792 idx = B->j + B->i[i] - 1; 793 v = B->a + B->i[i] - 1; 794 SPARSEDENSEMDOT(sum,ls,v,idx,n); 795 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 796 } 797 } 798 } 799 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 800 if (flag & SOR_ZERO_INITIAL_GUESS) { 801 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 802 } 803 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 804 CHKERR(ierr); 805 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 806 CHKERR(ierr); 807 while (its--) { 808 for ( i=0; i<m; i++ ) { 809 n = A->i[i+1] - A->i[i]; 810 idx = A->j + A->i[i] - 1; 811 v = A->a + A->i[i] - 1; 812 sum = b[i]; 813 SPARSEDENSEMDOT(sum,xs,v,idx,n); 814 d = shift + A->a[diag[i]-1]; 815 n = B->i[i+1] - B->i[i]; 816 idx = B->j + B->i[i] - 1; 817 v = B->a + B->i[i] - 1; 818 SPARSEDENSEMDOT(sum,ls,v,idx,n); 819 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 820 } 821 } 822 } 823 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 824 if (flag & SOR_ZERO_INITIAL_GUESS) { 825 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 826 } 827 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll, 828 mat->Mvctx); CHKERR(ierr); 829 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll, 830 mat->Mvctx); CHKERR(ierr); 831 while (its--) { 832 for ( i=m-1; i>-1; i-- ) { 833 n = A->i[i+1] - A->i[i]; 834 idx = A->j + A->i[i] - 1; 835 v = A->a + A->i[i] - 1; 836 sum = b[i]; 837 SPARSEDENSEMDOT(sum,xs,v,idx,n); 838 d = shift + A->a[diag[i]-1]; 839 n = B->i[i+1] - B->i[i]; 840 idx = B->j + B->i[i] - 1; 841 v = B->a + B->i[i] - 1; 842 SPARSEDENSEMDOT(sum,ls,v,idx,n); 843 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 844 } 845 } 846 } 847 return 0; 848 } 849 static int MatiAIJinsopt(Mat aijin,int op) 850 { 851 Matimpiaij *aij = (Matimpiaij *) aijin->data; 852 853 if (op == NO_NEW_NONZERO_LOCATIONS) { 854 MatSetOption(aij->A,op); 855 MatSetOption(aij->B,op); 856 } 857 else if (op == YES_NEW_NONZERO_LOCATIONS) { 858 MatSetOption(aij->A,op); 859 MatSetOption(aij->B,op); 860 } 861 else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported"); 862 return 0; 863 } 864 865 static int MatiAIJsize(Mat matin,int *m,int *n) 866 { 867 Matimpiaij *mat = (Matimpiaij *) matin->data; 868 *m = mat->M; *n = mat->N; 869 return 0; 870 } 871 872 static int MatiAIJlocalsize(Mat matin,int *m,int *n) 873 { 874 Matimpiaij *mat = (Matimpiaij *) matin->data; 875 *m = mat->m; *n = mat->n; 876 return 0; 877 } 878 879 static int MatiAIJrange(Mat matin,int *m,int *n) 880 { 881 Matimpiaij *mat = (Matimpiaij *) matin->data; 882 *m = mat->rstart; *n = mat->rend; 883 return 0; 884 } 885 886 static int MatiCopy(Mat,Mat *); 887 888 /* -------------------------------------------------------------------*/ 889 static struct _MatOps MatOps = {MatiAIJInsertValues, 890 0, 0, 891 MatiAIJMult,MatiAIJMultadd,MatiAIJMultTrans,MatiAIJMultTransadd, 892 0,0,0,0, 893 0,0, 894 MatiAIJrelax, 895 0, 896 0,0,0, 897 MatiCopy, 898 MatiAIJgetdiag,0,0, 899 MatiAIJBeginAssemble,MatiAIJEndAssemble, 900 0, 901 MatiAIJinsopt,MatiZero,MatiZerorows,0, 902 0,0,0,0, 903 MatiAIJsize,MatiAIJlocalsize,MatiAIJrange }; 904 905 906 907 /*@ 908 909 MatCreateMPIAIJ - Creates a sparse parallel matrix 910 in AIJ format. 911 912 Input Parameters: 913 . comm - MPI communicator 914 . m,n - number of local rows and columns (or -1 to have calculated) 915 . M,N - global rows and columns (or -1 to have calculated) 916 . d_nz - total number nonzeros in diagonal portion of matrix 917 . d_nzz - number of nonzeros per row in diagonal portion of matrix or null 918 . You must leave room for the diagonal entry even if it is zero. 919 . o_nz - total number nonzeros in off-diagonal portion of matrix 920 . o_nzz - number of nonzeros per row in off-diagonal portion of matrix 921 . or null. You must have at least one nonzero per row. 922 923 Output parameters: 924 . newmat - the matrix 925 926 Keywords: matrix, aij, compressed row, sparse, parallel 927 @*/ 928 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 929 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 930 { 931 Mat mat; 932 Matimpiaij *aij; 933 int ierr, i,sum[2],work[2]; 934 *newmat = 0; 935 CREATEHEADER(mat,_Mat); 936 mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 937 mat->cookie = MAT_COOKIE; 938 mat->ops = &MatOps; 939 mat->destroy = MatiAIJdestroy; 940 mat->view = MatiView; 941 mat->type = MATAIJMPI; 942 mat->factor = 0; 943 mat->row = 0; 944 mat->col = 0; 945 946 mat->comm = comm; 947 aij->insertmode = NotSetValues; 948 MPI_Comm_rank(comm,&aij->mytid); 949 MPI_Comm_size(comm,&aij->numtids); 950 951 if (M == -1 || N == -1) { 952 work[0] = m; work[1] = n; 953 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 954 if (M == -1) M = sum[0]; 955 if (N == -1) N = sum[1]; 956 } 957 if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 958 if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 959 aij->m = m; 960 aij->n = n; 961 aij->N = N; 962 aij->M = M; 963 964 /* build local table of row and column ownerships */ 965 aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int)); 966 CHKPTR(aij->rowners); 967 aij->cowners = aij->rowners + aij->numtids + 1; 968 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 969 aij->rowners[0] = 0; 970 for ( i=2; i<=aij->numtids; i++ ) { 971 aij->rowners[i] += aij->rowners[i-1]; 972 } 973 aij->rstart = aij->rowners[aij->mytid]; 974 aij->rend = aij->rowners[aij->mytid+1]; 975 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 976 aij->cowners[0] = 0; 977 for ( i=2; i<=aij->numtids; i++ ) { 978 aij->cowners[i] += aij->cowners[i-1]; 979 } 980 aij->cstart = aij->cowners[aij->mytid]; 981 aij->cend = aij->cowners[aij->mytid+1]; 982 983 984 ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr); 985 ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr); 986 987 /* build cache for off array entries formed */ 988 aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */ 989 aij->stash.n = 0; 990 aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) + 991 sizeof(Scalar))); CHKPTR(aij->stash.array); 992 aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax); 993 aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax); 994 995 /* stuff used for matrix vector multiply */ 996 aij->lvec = 0; 997 aij->Mvctx = 0; 998 aij->assembled = 0; 999 1000 *newmat = mat; 1001 return 0; 1002 } 1003 1004 static int MatiCopy(Mat matin,Mat *newmat) 1005 { 1006 Mat mat; 1007 Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data; 1008 int ierr; 1009 *newmat = 0; 1010 1011 if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix"); 1012 CREATEHEADER(mat,_Mat); 1013 mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 1014 mat->cookie = MAT_COOKIE; 1015 mat->ops = &MatOps; 1016 mat->destroy = MatiAIJdestroy; 1017 mat->view = MatiView; 1018 mat->type = MATAIJSEQ; 1019 mat->factor = matin->factor; 1020 mat->row = 0; 1021 mat->col = 0; 1022 1023 aij->m = oldmat->m; 1024 aij->n = oldmat->n; 1025 aij->M = oldmat->M; 1026 aij->N = oldmat->N; 1027 1028 aij->assembled = 1; 1029 aij->rstart = oldmat->rstart; 1030 aij->rend = oldmat->rend; 1031 aij->cstart = oldmat->cstart; 1032 aij->cend = oldmat->cend; 1033 aij->numtids = oldmat->numtids; 1034 aij->mytid = oldmat->mytid; 1035 aij->insertmode = NotSetValues; 1036 1037 aij->rowners = (int *) MALLOC( (aij->numtids+1)*sizeof(int) ); 1038 CHKPTR(aij->rowners); 1039 MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1040 aij->stash.nmax = 0; 1041 aij->stash.n = 0; 1042 aij->stash.array= 0; 1043 mat->comm = matin->comm; 1044 1045 ierr = VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr); 1046 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr); 1047 ierr = MatCopy(oldmat->A,&aij->A); CHKERR(ierr); 1048 ierr = MatCopy(oldmat->B,&aij->B); CHKERR(ierr); 1049 *newmat = mat; 1050 return 0; 1051 } 1052