1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.19 1995/03/26 04:43:10 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 #define CHUNCKSIZE 100 10 /* 11 This is a simple minded stash. Do a linear search to determine if 12 in stash, if not add to end. 13 */ 14 static int StashValues(Stash *stash,int row,int n, int *idxn, 15 Scalar *values,InsertMode addv) 16 { 17 int i,j,N = stash->n,found,*n_idx, *n_idy; 18 Scalar val,*n_array; 19 20 for ( i=0; i<n; i++ ) { 21 found = 0; 22 val = *values++; 23 for ( j=0; j<N; j++ ) { 24 if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) { 25 /* found a match */ 26 if (addv == AddValues) stash->array[j] += val; 27 else stash->array[j] = val; 28 found = 1; 29 break; 30 } 31 } 32 if (!found) { /* not found so add to end */ 33 if ( stash->n == stash->nmax ) { 34 /* allocate a larger stash */ 35 n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*( 36 2*sizeof(int) + sizeof(Scalar))); 37 CHKPTR(n_array); 38 n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE); 39 n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE); 40 MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar)); 41 MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int)); 42 MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int)); 43 if (stash->array) FREE(stash->array); 44 stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy; 45 stash->nmax += CHUNCKSIZE; 46 } 47 stash->array[stash->n] = val; 48 stash->idx[stash->n] = row; 49 stash->idy[stash->n++] = idxn[i]; 50 } 51 } 52 return 0; 53 } 54 55 /* local utility routine that creates a mapping from the global column 56 number to the local number in the off-diagonal part of the local 57 storage of the matrix. This is done in a non scable way since the 58 length of colmap equals the global matrix length. 59 */ 60 static int CreateColmap(Mat mat) 61 { 62 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 63 Mat_AIJ *B = (Mat_AIJ*) aij->B->data; 64 int n = B->n,i; 65 aij->colmap = (int *) MALLOC( aij->N*sizeof(int) ); CHKPTR(aij->colmap); 66 MEMSET(aij->colmap,0,aij->N*sizeof(int)); 67 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 68 return 0; 69 } 70 71 static int MatInsertValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 72 int *idxn,Scalar *v,InsertMode addv) 73 { 74 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 75 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 76 int cstart = aij->cstart, cend = aij->cend,row,col; 77 78 if (aij->insertmode != NotSetValues && aij->insertmode != addv) { 79 SETERR(1,"You cannot mix inserts and adds"); 80 } 81 aij->insertmode = addv; 82 for ( i=0; i<m; i++ ) { 83 if (idxm[i] < 0) SETERR(1,"Negative row index"); 84 if (idxm[i] >= aij->M) SETERR(1,"Row index too large"); 85 if (idxm[i] >= rstart && idxm[i] < rend) { 86 row = idxm[i] - rstart; 87 for ( j=0; j<n; j++ ) { 88 if (idxn[j] < 0) SETERR(1,"Negative column index"); 89 if (idxn[j] >= aij->N) SETERR(1,"Column index too large"); 90 if (idxn[j] >= cstart && idxn[j] < cend){ 91 col = idxn[j] - cstart; 92 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 93 } 94 else { 95 if (aij->assembled) { 96 if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);} 97 col = aij->colmap[idxn[j]] - 1; 98 if (col < 0) { 99 SETERR(1,"Cannot insert new off diagonal block nonzero in\ 100 already\ 101 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 102 if your need this feature"); 103 } 104 } 105 else col = idxn[j]; 106 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 107 } 108 } 109 } 110 else { 111 ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr); 112 } 113 } 114 return 0; 115 } 116 117 /* 118 the assembly code is alot like the code for vectors, we should 119 sometime derive a single assembly code that can be used for 120 either case. 121 */ 122 123 static int MatBeginAssemble_MPIAIJ(Mat mat) 124 { 125 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 126 MPI_Comm comm = mat->comm; 127 int numtids = aij->numtids, *owners = aij->rowners; 128 int mytid = aij->mytid; 129 MPI_Request *send_waits,*recv_waits; 130 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 131 int tag = 50, *owner,*starts,count; 132 InsertMode addv; 133 Scalar *rvalues,*svalues; 134 135 /* make sure all processors are either in INSERTMODE or ADDMODE */ 136 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 137 MPI_BOR,comm); 138 if (addv == (AddValues|InsertValues)) { 139 SETERR(1,"Some processors have inserted while others have added"); 140 } 141 aij->insertmode = addv; /* in case this processor had no cache */ 142 143 /* first count number of contributors to each processor */ 144 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 145 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 146 owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner); 147 for ( i=0; i<aij->stash.n; i++ ) { 148 idx = aij->stash.idx[i]; 149 for ( j=0; j<numtids; j++ ) { 150 if (idx >= owners[j] && idx < owners[j+1]) { 151 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 152 } 153 } 154 } 155 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 156 157 /* inform other processors of number of messages and max length*/ 158 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 159 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 160 nreceives = work[mytid]; 161 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 162 nmax = work[mytid]; 163 FREE(work); 164 165 /* post receives: 166 1) each message will consist of ordered pairs 167 (global index,value) we store the global index as a double 168 to simplify the message passing. 169 2) since we don't know how long each individual message is we 170 allocate the largest needed buffer for each receive. Potentially 171 this is a lot of wasted space. 172 173 174 This could be done better. 175 */ 176 rvalues = (Scalar *) MALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 177 CHKPTR(rvalues); 178 recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request)); 179 CHKPTR(recv_waits); 180 for ( i=0; i<nreceives; i++ ) { 181 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag, 182 comm,recv_waits+i); 183 } 184 185 /* do sends: 186 1) starts[i] gives the starting index in svalues for stuff going to 187 the ith processor 188 */ 189 svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 190 CHKPTR(svalues); 191 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 192 CHKPTR(send_waits); 193 starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts); 194 starts[0] = 0; 195 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 196 for ( i=0; i<aij->stash.n; i++ ) { 197 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 198 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 199 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 200 } 201 FREE(owner); 202 starts[0] = 0; 203 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 204 count = 0; 205 for ( i=0; i<numtids; i++ ) { 206 if (procs[i]) { 207 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag, 208 comm,send_waits+count++); 209 } 210 } 211 FREE(starts); FREE(nprocs); 212 213 /* Free cache space */ 214 aij->stash.nmax = aij->stash.n = 0; 215 if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;} 216 217 aij->svalues = svalues; aij->rvalues = rvalues; 218 aij->nsends = nsends; aij->nrecvs = nreceives; 219 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 220 aij->rmax = nmax; 221 222 return 0; 223 } 224 extern int MatSetUpMultiply_MPIAIJ(Mat); 225 226 static int MatEndAssemble_MPIAIJ(Mat mat) 227 { 228 int ierr; 229 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 230 231 MPI_Status *send_status,recv_status; 232 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 233 int row,col; 234 Scalar *values,val; 235 InsertMode addv = aij->insertmode; 236 237 /* wait on receives */ 238 while (count) { 239 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 240 /* unpack receives into our local space */ 241 values = aij->rvalues + 3*imdex*aij->rmax; 242 MPI_Get_count(&recv_status,MPI_SCALAR,&n); 243 n = n/3; 244 for ( i=0; i<n; i++ ) { 245 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 246 col = (int) PETSCREAL(values[3*i+1]); 247 val = values[3*i+2]; 248 if (col >= aij->cstart && col < aij->cend) { 249 col -= aij->cstart; 250 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 251 } 252 else { 253 if (aij->assembled) { 254 if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);} 255 col = aij->colmap[col] - 1; 256 if (col < 0) { 257 SETERR(1,"Cannot insert new off diagonal block nonzero in\ 258 already\ 259 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 260 if your need this feature"); 261 } 262 } 263 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 264 } 265 } 266 count--; 267 } 268 FREE(aij->recv_waits); FREE(aij->rvalues); 269 270 /* wait on sends */ 271 if (aij->nsends) { 272 send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) ); 273 CHKPTR(send_status); 274 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 275 FREE(send_status); 276 } 277 FREE(aij->send_waits); FREE(aij->svalues); 278 279 aij->insertmode = NotSetValues; 280 ierr = MatBeginAssembly(aij->A); CHKERR(ierr); 281 ierr = MatEndAssembly(aij->A); CHKERR(ierr); 282 283 if (!aij->assembled) { 284 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERR(ierr); 285 } 286 ierr = MatBeginAssembly(aij->B); CHKERR(ierr); 287 ierr = MatEndAssembly(aij->B); CHKERR(ierr); 288 289 aij->assembled = 1; 290 return 0; 291 } 292 293 static int MatZero_MPIAIJ(Mat A) 294 { 295 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 296 297 MatZeroEntries(l->A); MatZeroEntries(l->B); 298 return 0; 299 } 300 301 /* again this uses the same basic stratagy as in the assembly and 302 scatter create routines, we should try to do it systemamatically 303 if we can figure out the proper level of generality. */ 304 305 /* the code does not do the diagonal entries correctly unless the 306 matrix is square and the column and row owerships are identical. 307 This is a BUG. The only way to fix it seems to be to access 308 aij->A and aij->B directly and not through the MatZeroRows() 309 routine. 310 */ 311 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 312 { 313 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 314 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 315 int *procs,*nprocs,j,found,idx,nsends,*work; 316 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 317 int *rvalues,tag = 67,count,base,slen,n,*source; 318 int *lens,imdex,*lrows,*values; 319 MPI_Comm comm = A->comm; 320 MPI_Request *send_waits,*recv_waits; 321 MPI_Status recv_status,*send_status; 322 IS istmp; 323 324 if (!l->assembled) SETERR(1,"MatZeroRows_MPIAIJ: must assemble matrix first"); 325 ierr = ISGetLocalSize(is,&N); CHKERR(ierr); 326 ierr = ISGetIndices(is,&rows); CHKERR(ierr); 327 328 /* first count number of contributors to each processor */ 329 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 330 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 331 owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/ 332 for ( i=0; i<N; i++ ) { 333 idx = rows[i]; 334 found = 0; 335 for ( j=0; j<numtids; j++ ) { 336 if (idx >= owners[j] && idx < owners[j+1]) { 337 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 338 } 339 } 340 if (!found) SETERR(1,"Imdex out of range"); 341 } 342 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 343 344 /* inform other processors of number of messages and max length*/ 345 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 346 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 347 nrecvs = work[mytid]; 348 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 349 nmax = work[mytid]; 350 FREE(work); 351 352 /* post receives: */ 353 rvalues = (int *) MALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 354 CHKPTR(rvalues); 355 recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request)); 356 CHKPTR(recv_waits); 357 for ( i=0; i<nrecvs; i++ ) { 358 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 359 comm,recv_waits+i); 360 } 361 362 /* do sends: 363 1) starts[i] gives the starting index in svalues for stuff going to 364 the ith processor 365 */ 366 svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues); 367 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 368 CHKPTR(send_waits); 369 starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts); 370 starts[0] = 0; 371 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 372 for ( i=0; i<N; i++ ) { 373 svalues[starts[owner[i]]++] = rows[i]; 374 } 375 ISRestoreIndices(is,&rows); 376 377 starts[0] = 0; 378 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 379 count = 0; 380 for ( i=0; i<numtids; i++ ) { 381 if (procs[i]) { 382 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 383 comm,send_waits+count++); 384 } 385 } 386 FREE(starts); 387 388 base = owners[mytid]; 389 390 /* wait on receives */ 391 lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens); 392 source = lens + nrecvs; 393 count = nrecvs; slen = 0; 394 while (count) { 395 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 396 /* unpack receives into our local space */ 397 MPI_Get_count(&recv_status,MPI_INT,&n); 398 source[imdex] = recv_status.MPI_SOURCE; 399 lens[imdex] = n; 400 slen += n; 401 count--; 402 } 403 FREE(recv_waits); 404 405 /* move the data into the send scatter */ 406 lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows); 407 count = 0; 408 for ( i=0; i<nrecvs; i++ ) { 409 values = rvalues + i*nmax; 410 for ( j=0; j<lens[i]; j++ ) { 411 lrows[count++] = values[j] - base; 412 } 413 } 414 FREE(rvalues); FREE(lens); 415 FREE(owner); FREE(nprocs); 416 417 /* actually zap the local rows */ 418 ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr); FREE(lrows); 419 ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr); 420 ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr); 421 ierr = ISDestroy(istmp); CHKERR(ierr); 422 423 /* wait on sends */ 424 if (nsends) { 425 send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) ); 426 CHKPTR(send_status); 427 MPI_Waitall(nsends,send_waits,send_status); 428 FREE(send_status); 429 } 430 FREE(send_waits); FREE(svalues); 431 432 433 return 0; 434 } 435 436 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 437 { 438 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 439 int ierr; 440 if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first"); 441 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 442 CHKERR(ierr); 443 ierr = MatMult(aij->A,xx,yy); CHKERR(ierr); 444 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 445 CHKERR(ierr); 446 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr); 447 return 0; 448 } 449 450 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 451 { 452 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 453 int ierr; 454 if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first"); 455 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 456 CHKERR(ierr); 457 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr); 458 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 459 CHKERR(ierr); 460 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr); 461 return 0; 462 } 463 464 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 465 { 466 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 467 int ierr; 468 469 if (!aij->assembled) 470 SETERR(1,"MatMulTrans_MPIAIJ: must assemble matrix first"); 471 /* do nondiagonal part */ 472 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 473 /* send it on its way */ 474 ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues, 475 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 476 /* do local part */ 477 ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr); 478 /* receive remote parts: note this assumes the values are not actually */ 479 /* inserted in yy until the next line, which is true for my implementation*/ 480 /* but is not perhaps always true. */ 481 ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse, 482 aij->Mvctx); CHKERR(ierr); 483 return 0; 484 } 485 486 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 487 { 488 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 489 int ierr; 490 491 if (!aij->assembled) 492 SETERR(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first"); 493 /* do nondiagonal part */ 494 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 495 /* send it on its way */ 496 ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues, 497 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 498 /* do local part */ 499 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr); 500 /* receive remote parts: note this assumes the values are not actually */ 501 /* inserted in yy until the next line, which is true for my implementation*/ 502 /* but is not perhaps always true. */ 503 ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse, 504 aij->Mvctx); CHKERR(ierr); 505 return 0; 506 } 507 508 /* 509 This only works correctly for square matrices where the subblock A->A is the 510 diagonal block 511 */ 512 static int MatGetDiag_MPIAIJ(Mat Ain,Vec v) 513 { 514 Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 515 if (!A->assembled) SETERR(1,"MatGetDiag_MPIAIJ: must assemble matrix first"); 516 return MatGetDiagonal(A->A,v); 517 } 518 519 static int MatDestroy_MPIAIJ(PetscObject obj) 520 { 521 Mat mat = (Mat) obj; 522 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 523 int ierr; 524 #if defined(PETSC_LOG) 525 PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N); 526 #endif 527 FREE(aij->rowners); 528 ierr = MatDestroy(aij->A); CHKERR(ierr); 529 ierr = MatDestroy(aij->B); CHKERR(ierr); 530 if (aij->colmap) FREE(aij->colmap); 531 if (aij->garray) FREE(aij->garray); 532 if (aij->lvec) VecDestroy(aij->lvec); 533 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 534 FREE(aij); 535 PLogObjectDestroy(mat); 536 PETSCHEADERDESTROY(mat); 537 return 0; 538 } 539 540 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 541 { 542 Mat mat = (Mat) obj; 543 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 544 int ierr; 545 PetscObject vobj = (PetscObject) viewer; 546 547 if (!aij->assembled) SETERR(1,"MatView_MPIAIJ: must assemble matrix first"); 548 if (vobj->cookie == VIEWER_COOKIE) { 549 FILE *fd = ViewerFileGetPointer(viewer); 550 if (vobj->type == FILE_VIEWER) { 551 MPE_Seq_begin(mat->comm,1); 552 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 553 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 554 aij->cend); 555 ierr = MatView(aij->A,viewer); CHKERR(ierr); 556 ierr = MatView(aij->B,viewer); CHKERR(ierr); 557 fflush(fd); 558 MPE_Seq_end(mat->comm,1); 559 } 560 else if (vobj->type == FILES_VIEWER) { 561 int numtids = aij->numtids, mytid = aij->mytid; 562 if (numtids == 1) { 563 ierr = MatView(aij->A,viewer); CHKERR(ierr); 564 } 565 else { 566 /* assemble the entire matrix onto first processor. */ 567 Mat A; 568 Mat_AIJ *Aaij; 569 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 570 Scalar *a; 571 if (!mytid) { 572 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A); 573 } 574 else { 575 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A); 576 } 577 CHKERR(ierr); 578 579 /* copy over the A part */ 580 Aaij = (Mat_AIJ*) aij->A->data; 581 m = Aaij->m; ai = Aaij->i; aj = Aaij->j; a = Aaij->a; 582 row = aij->rstart; 583 for ( i=0; i<ai[m]; i++ ) {aj[i] += aij->cstart - 1;} 584 for ( i=0; i<m; i++ ) { 585 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,InsertValues); 586 CHKERR(ierr); 587 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 588 } 589 aj = Aaij->j; 590 for ( i=0; i<ai[m]; i++ ) {aj[i] -= aij->cstart - 1;} 591 592 /* copy over the B part */ 593 Aaij = (Mat_AIJ*) aij->B->data; 594 m = Aaij->m; ai = Aaij->i; aj = Aaij->j; a = Aaij->a; 595 row = aij->rstart; 596 ct = cols = (int *) MALLOC( (ai[m]+1)*sizeof(int) ); CHKPTR(cols); 597 for ( i=0; i<ai[m]; i++ ) {cols[i] = aij->garray[aj[i]-1];} 598 for ( i=0; i<m; i++ ) { 599 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,InsertValues); 600 CHKERR(ierr); 601 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 602 } 603 FREE(ct); 604 605 ierr = MatBeginAssembly(A); CHKERR(ierr); 606 ierr = MatEndAssembly(A); CHKERR(ierr); 607 if (!mytid) { 608 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERR(ierr); 609 } 610 ierr = MatDestroy(A); CHKERR(ierr); 611 } 612 } 613 } 614 return 0; 615 } 616 617 extern int MatMarkDiag_AIJ(Mat_AIJ *); 618 /* 619 This has to provide several versions. 620 621 1) per sequential 622 2) a) use only local smoothing updating outer values only once. 623 b) local smoothing updating outer values each inner iteration 624 3) color updating out values betwen colors. 625 */ 626 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,int flag,double shift, 627 int its,Vec xx) 628 { 629 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 630 Mat AA = mat->A, BB = mat->B; 631 Mat_AIJ *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data; 632 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 633 int ierr,*idx, *diag; 634 int n = mat->n, m = mat->m, i; 635 Vec tt; 636 637 if (!mat->assembled) SETERR(1,"MatRelax_MPIAIJ: must assemble matrix first"); 638 639 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 640 xs = x -1; /* shift by one for index start of 1 */ 641 ls--; 642 if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;} 643 diag = A->diag; 644 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 645 SETERR(1,"That option not yet support for parallel AIJ matrices"); 646 } 647 if (flag & SOR_EISENSTAT) { 648 /* Let A = L + U + D; where L is lower trianglar, 649 U is upper triangular, E is diagonal; This routine applies 650 651 (L + E)^{-1} A (U + E)^{-1} 652 653 to a vector efficiently using Eisenstat's trick. This is for 654 the case of SSOR preconditioner, so E is D/omega where omega 655 is the relaxation factor. 656 */ 657 ierr = VecCreate(xx,&tt); CHKERR(ierr); 658 VecGetArray(tt,&t); 659 scale = (2.0/omega) - 1.0; 660 /* x = (E + U)^{-1} b */ 661 VecSet(&zero,mat->lvec); 662 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 663 mat->Mvctx); CHKERR(ierr); 664 for ( i=m-1; i>-1; i-- ) { 665 n = A->i[i+1] - diag[i] - 1; 666 idx = A->j + diag[i]; 667 v = A->a + diag[i]; 668 sum = b[i]; 669 SPARSEDENSEMDOT(sum,xs,v,idx,n); 670 d = shift + A->a[diag[i]-1]; 671 n = B->i[i+1] - B->i[i]; 672 idx = B->j + B->i[i] - 1; 673 v = B->a + B->i[i] - 1; 674 SPARSEDENSEMDOT(sum,ls,v,idx,n); 675 x[i] = omega*(sum/d); 676 } 677 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 678 mat->Mvctx); CHKERR(ierr); 679 680 /* t = b - (2*E - D)x */ 681 v = A->a; 682 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 683 684 /* t = (E + L)^{-1}t */ 685 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 686 diag = A->diag; 687 VecSet(&zero,mat->lvec); 688 ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown, 689 mat->Mvctx); CHKERR(ierr); 690 for ( i=0; i<m; i++ ) { 691 n = diag[i] - A->i[i]; 692 idx = A->j + A->i[i] - 1; 693 v = A->a + A->i[i] - 1; 694 sum = t[i]; 695 SPARSEDENSEMDOT(sum,ts,v,idx,n); 696 d = shift + A->a[diag[i]-1]; 697 n = B->i[i+1] - B->i[i]; 698 idx = B->j + B->i[i] - 1; 699 v = B->a + B->i[i] - 1; 700 SPARSEDENSEMDOT(sum,ls,v,idx,n); 701 t[i] = omega*(sum/d); 702 } 703 ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown, 704 mat->Mvctx); CHKERR(ierr); 705 /* x = x + t */ 706 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 707 VecDestroy(tt); 708 return 0; 709 } 710 711 712 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 713 if (flag & SOR_ZERO_INITIAL_GUESS) { 714 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 715 } 716 else { 717 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 718 CHKERR(ierr); 719 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 720 CHKERR(ierr); 721 } 722 while (its--) { 723 /* go down through the rows */ 724 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 725 mat->Mvctx); CHKERR(ierr); 726 for ( i=0; i<m; i++ ) { 727 n = A->i[i+1] - A->i[i]; 728 idx = A->j + A->i[i] - 1; 729 v = A->a + A->i[i] - 1; 730 sum = b[i]; 731 SPARSEDENSEMDOT(sum,xs,v,idx,n); 732 d = shift + A->a[diag[i]-1]; 733 n = B->i[i+1] - B->i[i]; 734 idx = B->j + B->i[i] - 1; 735 v = B->a + B->i[i] - 1; 736 SPARSEDENSEMDOT(sum,ls,v,idx,n); 737 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 738 } 739 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 740 mat->Mvctx); CHKERR(ierr); 741 /* come up through the rows */ 742 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 743 mat->Mvctx); CHKERR(ierr); 744 for ( i=m-1; i>-1; i-- ) { 745 n = A->i[i+1] - A->i[i]; 746 idx = A->j + A->i[i] - 1; 747 v = A->a + A->i[i] - 1; 748 sum = b[i]; 749 SPARSEDENSEMDOT(sum,xs,v,idx,n); 750 d = shift + A->a[diag[i]-1]; 751 n = B->i[i+1] - B->i[i]; 752 idx = B->j + B->i[i] - 1; 753 v = B->a + B->i[i] - 1; 754 SPARSEDENSEMDOT(sum,ls,v,idx,n); 755 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 756 } 757 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 758 mat->Mvctx); CHKERR(ierr); 759 } 760 } 761 else if (flag & SOR_FORWARD_SWEEP){ 762 if (flag & SOR_ZERO_INITIAL_GUESS) { 763 VecSet(&zero,mat->lvec); 764 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 765 mat->Mvctx); CHKERR(ierr); 766 for ( i=0; i<m; i++ ) { 767 n = diag[i] - A->i[i]; 768 idx = A->j + A->i[i] - 1; 769 v = A->a + A->i[i] - 1; 770 sum = b[i]; 771 SPARSEDENSEMDOT(sum,xs,v,idx,n); 772 d = shift + A->a[diag[i]-1]; 773 n = B->i[i+1] - B->i[i]; 774 idx = B->j + B->i[i] - 1; 775 v = B->a + B->i[i] - 1; 776 SPARSEDENSEMDOT(sum,ls,v,idx,n); 777 x[i] = omega*(sum/d); 778 } 779 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 780 mat->Mvctx); CHKERR(ierr); 781 its--; 782 } 783 while (its--) { 784 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 785 CHKERR(ierr); 786 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 787 CHKERR(ierr); 788 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 789 mat->Mvctx); CHKERR(ierr); 790 for ( i=0; i<m; i++ ) { 791 n = A->i[i+1] - A->i[i]; 792 idx = A->j + A->i[i] - 1; 793 v = A->a + A->i[i] - 1; 794 sum = b[i]; 795 SPARSEDENSEMDOT(sum,xs,v,idx,n); 796 d = shift + A->a[diag[i]-1]; 797 n = B->i[i+1] - B->i[i]; 798 idx = B->j + B->i[i] - 1; 799 v = B->a + B->i[i] - 1; 800 SPARSEDENSEMDOT(sum,ls,v,idx,n); 801 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 802 } 803 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 804 mat->Mvctx); CHKERR(ierr); 805 } 806 } 807 else if (flag & SOR_BACKWARD_SWEEP){ 808 if (flag & SOR_ZERO_INITIAL_GUESS) { 809 VecSet(&zero,mat->lvec); 810 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 811 mat->Mvctx); CHKERR(ierr); 812 for ( i=m-1; i>-1; i-- ) { 813 n = A->i[i+1] - diag[i] - 1; 814 idx = A->j + diag[i]; 815 v = A->a + diag[i]; 816 sum = b[i]; 817 SPARSEDENSEMDOT(sum,xs,v,idx,n); 818 d = shift + A->a[diag[i]-1]; 819 n = B->i[i+1] - B->i[i]; 820 idx = B->j + B->i[i] - 1; 821 v = B->a + B->i[i] - 1; 822 SPARSEDENSEMDOT(sum,ls,v,idx,n); 823 x[i] = omega*(sum/d); 824 } 825 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 826 mat->Mvctx); CHKERR(ierr); 827 its--; 828 } 829 while (its--) { 830 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown, 831 mat->Mvctx); CHKERR(ierr); 832 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown, 833 mat->Mvctx); CHKERR(ierr); 834 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 835 mat->Mvctx); CHKERR(ierr); 836 for ( i=m-1; i>-1; i-- ) { 837 n = A->i[i+1] - A->i[i]; 838 idx = A->j + A->i[i] - 1; 839 v = A->a + A->i[i] - 1; 840 sum = b[i]; 841 SPARSEDENSEMDOT(sum,xs,v,idx,n); 842 d = shift + A->a[diag[i]-1]; 843 n = B->i[i+1] - B->i[i]; 844 idx = B->j + B->i[i] - 1; 845 v = B->a + B->i[i] - 1; 846 SPARSEDENSEMDOT(sum,ls,v,idx,n); 847 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 848 } 849 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 850 mat->Mvctx); CHKERR(ierr); 851 } 852 } 853 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 854 if (flag & SOR_ZERO_INITIAL_GUESS) { 855 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 856 } 857 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 858 CHKERR(ierr); 859 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 860 CHKERR(ierr); 861 while (its--) { 862 /* go down through the rows */ 863 for ( i=0; i<m; i++ ) { 864 n = A->i[i+1] - A->i[i]; 865 idx = A->j + A->i[i] - 1; 866 v = A->a + A->i[i] - 1; 867 sum = b[i]; 868 SPARSEDENSEMDOT(sum,xs,v,idx,n); 869 d = shift + A->a[diag[i]-1]; 870 n = B->i[i+1] - B->i[i]; 871 idx = B->j + B->i[i] - 1; 872 v = B->a + B->i[i] - 1; 873 SPARSEDENSEMDOT(sum,ls,v,idx,n); 874 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 875 } 876 /* come up through the rows */ 877 for ( i=m-1; i>-1; i-- ) { 878 n = A->i[i+1] - A->i[i]; 879 idx = A->j + A->i[i] - 1; 880 v = A->a + A->i[i] - 1; 881 sum = b[i]; 882 SPARSEDENSEMDOT(sum,xs,v,idx,n); 883 d = shift + A->a[diag[i]-1]; 884 n = B->i[i+1] - B->i[i]; 885 idx = B->j + B->i[i] - 1; 886 v = B->a + B->i[i] - 1; 887 SPARSEDENSEMDOT(sum,ls,v,idx,n); 888 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 889 } 890 } 891 } 892 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 893 if (flag & SOR_ZERO_INITIAL_GUESS) { 894 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 895 } 896 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 897 CHKERR(ierr); 898 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 899 CHKERR(ierr); 900 while (its--) { 901 for ( i=0; i<m; i++ ) { 902 n = A->i[i+1] - A->i[i]; 903 idx = A->j + A->i[i] - 1; 904 v = A->a + A->i[i] - 1; 905 sum = b[i]; 906 SPARSEDENSEMDOT(sum,xs,v,idx,n); 907 d = shift + A->a[diag[i]-1]; 908 n = B->i[i+1] - B->i[i]; 909 idx = B->j + B->i[i] - 1; 910 v = B->a + B->i[i] - 1; 911 SPARSEDENSEMDOT(sum,ls,v,idx,n); 912 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 913 } 914 } 915 } 916 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 917 if (flag & SOR_ZERO_INITIAL_GUESS) { 918 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 919 } 920 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll, 921 mat->Mvctx); CHKERR(ierr); 922 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll, 923 mat->Mvctx); CHKERR(ierr); 924 while (its--) { 925 for ( i=m-1; i>-1; i-- ) { 926 n = A->i[i+1] - A->i[i]; 927 idx = A->j + A->i[i] - 1; 928 v = A->a + A->i[i] - 1; 929 sum = b[i]; 930 SPARSEDENSEMDOT(sum,xs,v,idx,n); 931 d = shift + A->a[diag[i]-1]; 932 n = B->i[i+1] - B->i[i]; 933 idx = B->j + B->i[i] - 1; 934 v = B->a + B->i[i] - 1; 935 SPARSEDENSEMDOT(sum,ls,v,idx,n); 936 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 937 } 938 } 939 } 940 return 0; 941 } 942 static int MatInsOpt_MPIAIJ(Mat aijin,int op) 943 { 944 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 945 946 if (op == NO_NEW_NONZERO_LOCATIONS) { 947 MatSetOption(aij->A,op); 948 MatSetOption(aij->B,op); 949 } 950 else if (op == YES_NEW_NONZERO_LOCATIONS) { 951 MatSetOption(aij->A,op); 952 MatSetOption(aij->B,op); 953 } 954 else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported"); 955 return 0; 956 } 957 958 static int MatSize_MPIAIJ(Mat matin,int *m,int *n) 959 { 960 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 961 *m = mat->M; *n = mat->N; 962 return 0; 963 } 964 965 static int MatLocalSize_MPIAIJ(Mat matin,int *m,int *n) 966 { 967 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 968 *m = mat->m; *n = mat->n; 969 return 0; 970 } 971 972 static int MatRange_MPIAIJ(Mat matin,int *m,int *n) 973 { 974 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 975 *m = mat->rstart; *n = mat->rend; 976 return 0; 977 } 978 979 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 980 { 981 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) matin->data; 982 Scalar *vworkA, *vworkB; 983 int ierr, *cworkA, *cworkB, lrow, cstart = aij->cstart; 984 int nztot, nzA, nzB, i, rstart = aij->rstart, rend = aij->rend; 985 986 if (!aij->assembled) 987 SETERR(1,"MatGetRow_MPIAIJ: Must assemble matrix first."); 988 if (row < rstart || row >= rend) 989 SETERR(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.") 990 lrow = row - rstart; 991 ierr = MatGetRow(aij->A,lrow,&nzA,&cworkA,&vworkA); CHKERR(ierr); 992 for (i=0; i<nzA; i++) cworkA[i] += cstart; 993 ierr = MatGetRow(aij->B,lrow,&nzB,&cworkB,&vworkB); CHKERR(ierr); 994 for (i=0; i<nzB; i++) cworkB[i] = aij->garray[cworkB[i]]; 995 996 if ((nztot = nzA + nzB)) { 997 *idx = (int *) MALLOC( (nztot)*sizeof(int) ); CHKPTR(*idx); 998 *v = (Scalar *) MALLOC( (nztot)*sizeof(Scalar) ); CHKPTR(*v); 999 for ( i=0; i<nzA; i++ ) { 1000 (*idx)[i] = cworkA[i]; 1001 (*v)[i] = vworkA[i]; 1002 } 1003 for ( i=0; i<nzB; i++ ) { 1004 (*idx)[i+nzA] = cworkB[i]; 1005 (*v)[i+nzA] = vworkB[i]; 1006 } 1007 } 1008 else {*idx = 0; *v=0;} 1009 *nz = nztot; 1010 ierr = MatRestoreRow(aij->A,lrow,&nzA,&cworkA,&vworkA); CHKERR(ierr); 1011 ierr = MatRestoreRow(aij->B,lrow,&nzB,&cworkB,&vworkB); CHKERR(ierr); 1012 return 0; 1013 } 1014 1015 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1016 { 1017 if (*idx) FREE(*idx); 1018 if (*v) FREE(*v); 1019 return 0; 1020 } 1021 1022 static int MatCopy_MPIAIJ(Mat,Mat *); 1023 extern int MatConvert_MPIAIJ(Mat,MATTYPE,Mat *); 1024 1025 /* -------------------------------------------------------------------*/ 1026 static struct _MatOps MatOps = {MatInsertValues_MPIAIJ, 1027 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1028 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1029 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1030 0,0,0,0, 1031 0,0, 1032 MatRelax_MPIAIJ, 1033 0, 1034 0,0,0, 1035 MatCopy_MPIAIJ, 1036 MatGetDiag_MPIAIJ,0,0, 1037 MatBeginAssemble_MPIAIJ,MatEndAssemble_MPIAIJ, 1038 0, 1039 MatInsOpt_MPIAIJ,MatZero_MPIAIJ,MatZeroRows_MPIAIJ,0, 1040 0,0,0,0, 1041 MatSize_MPIAIJ,MatLocalSize_MPIAIJ,MatRange_MPIAIJ, 1042 0,0, 1043 0,MatConvert_MPIAIJ }; 1044 1045 /*@ 1046 1047 MatCreateMPIAIJ - Creates a sparse parallel matrix 1048 in AIJ format. 1049 1050 Input Parameters: 1051 . comm - MPI communicator 1052 . m,n - number of local rows and columns (or -1 to have calculated) 1053 . M,N - global rows and columns (or -1 to have calculated) 1054 . d_nz - total number nonzeros in diagonal portion of matrix 1055 . d_nzz - number of nonzeros per row in diagonal portion of matrix or null 1056 . You must leave room for the diagonal entry even if it is zero. 1057 . o_nz - total number nonzeros in off-diagonal portion of matrix 1058 . o_nzz - number of nonzeros per row in off-diagonal portion of matrix 1059 . or null. You must have at least one nonzero per row. 1060 1061 Output parameters: 1062 . newmat - the matrix 1063 1064 Keywords: matrix, aij, compressed row, sparse, parallel 1065 @*/ 1066 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1067 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1068 { 1069 Mat mat; 1070 Mat_MPIAIJ *aij; 1071 int ierr, i,sum[2],work[2]; 1072 *newmat = 0; 1073 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1074 PLogObjectCreate(mat); 1075 mat->data = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij); 1076 mat->ops = &MatOps; 1077 mat->destroy = MatDestroy_MPIAIJ; 1078 mat->view = MatView_MPIAIJ; 1079 mat->factor = 0; 1080 mat->row = 0; 1081 mat->col = 0; 1082 1083 mat->comm = comm; 1084 aij->insertmode = NotSetValues; 1085 MPI_Comm_rank(comm,&aij->mytid); 1086 MPI_Comm_size(comm,&aij->numtids); 1087 1088 if (M == -1 || N == -1) { 1089 work[0] = m; work[1] = n; 1090 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 1091 if (M == -1) M = sum[0]; 1092 if (N == -1) N = sum[1]; 1093 } 1094 if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 1095 if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 1096 aij->m = m; 1097 aij->n = n; 1098 aij->N = N; 1099 aij->M = M; 1100 1101 /* build local table of row and column ownerships */ 1102 aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int)); 1103 CHKPTR(aij->rowners); 1104 aij->cowners = aij->rowners + aij->numtids + 1; 1105 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 1106 aij->rowners[0] = 0; 1107 for ( i=2; i<=aij->numtids; i++ ) { 1108 aij->rowners[i] += aij->rowners[i-1]; 1109 } 1110 aij->rstart = aij->rowners[aij->mytid]; 1111 aij->rend = aij->rowners[aij->mytid+1]; 1112 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 1113 aij->cowners[0] = 0; 1114 for ( i=2; i<=aij->numtids; i++ ) { 1115 aij->cowners[i] += aij->cowners[i-1]; 1116 } 1117 aij->cstart = aij->cowners[aij->mytid]; 1118 aij->cend = aij->cowners[aij->mytid+1]; 1119 1120 1121 ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr); 1122 PLogObjectParent(mat,aij->A); 1123 ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr); 1124 PLogObjectParent(mat,aij->B); 1125 1126 /* build cache for off array entries formed */ 1127 aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */ 1128 aij->stash.n = 0; 1129 aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) + 1130 sizeof(Scalar))); CHKPTR(aij->stash.array); 1131 aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax); 1132 aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax); 1133 aij->colmap = 0; 1134 aij->garray = 0; 1135 1136 /* stuff used for matrix vector multiply */ 1137 aij->lvec = 0; 1138 aij->Mvctx = 0; 1139 aij->assembled = 0; 1140 1141 *newmat = mat; 1142 return 0; 1143 } 1144 1145 static int MatCopy_MPIAIJ(Mat matin,Mat *newmat) 1146 { 1147 Mat mat; 1148 Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 1149 int ierr; 1150 *newmat = 0; 1151 1152 if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix"); 1153 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1154 PLogObjectCreate(mat); 1155 mat->data = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij); 1156 mat->ops = &MatOps; 1157 mat->destroy = MatDestroy_MPIAIJ; 1158 mat->view = MatView_MPIAIJ; 1159 mat->factor = matin->factor; 1160 mat->row = 0; 1161 mat->col = 0; 1162 1163 aij->m = oldmat->m; 1164 aij->n = oldmat->n; 1165 aij->M = oldmat->M; 1166 aij->N = oldmat->N; 1167 1168 aij->assembled = 1; 1169 aij->rstart = oldmat->rstart; 1170 aij->rend = oldmat->rend; 1171 aij->cstart = oldmat->cstart; 1172 aij->cend = oldmat->cend; 1173 aij->numtids = oldmat->numtids; 1174 aij->mytid = oldmat->mytid; 1175 aij->insertmode = NotSetValues; 1176 1177 aij->rowners = (int *) MALLOC( (aij->numtids+1)*sizeof(int) ); 1178 CHKPTR(aij->rowners); 1179 MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1180 aij->stash.nmax = 0; 1181 aij->stash.n = 0; 1182 aij->stash.array= 0; 1183 aij->colmap = 0; 1184 aij->garray = 0; 1185 mat->comm = matin->comm; 1186 1187 ierr = VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr); 1188 PLogObjectParent(mat,aij->lvec); 1189 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr); 1190 PLogObjectParent(mat,aij->Mvctx); 1191 ierr = MatCopy(oldmat->A,&aij->A); CHKERR(ierr); 1192 PLogObjectParent(mat,aij->A); 1193 ierr = MatCopy(oldmat->B,&aij->B); CHKERR(ierr); 1194 PLogObjectParent(mat,aij->B); 1195 *newmat = mat; 1196 return 0; 1197 } 1198