1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.14 1995/03/23 22:59:30 curfman Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 #define CHUNCKSIZE 100 10 /* 11 This is a simple minded stash. Do a linear search to determine if 12 in stash, if not add to end. 13 */ 14 static int StashValues(Stash *stash,int row,int n, int *idxn, 15 Scalar *values,InsertMode addv) 16 { 17 int i,j,N = stash->n,found,*n_idx, *n_idy; 18 Scalar val,*n_array; 19 20 for ( i=0; i<n; i++ ) { 21 found = 0; 22 val = *values++; 23 for ( j=0; j<N; j++ ) { 24 if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) { 25 /* found a match */ 26 if (addv == AddValues) stash->array[j] += val; 27 else stash->array[j] = val; 28 found = 1; 29 break; 30 } 31 } 32 if (!found) { /* not found so add to end */ 33 if ( stash->n == stash->nmax ) { 34 /* allocate a larger stash */ 35 n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*( 36 2*sizeof(int) + sizeof(Scalar))); 37 CHKPTR(n_array); 38 n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE); 39 n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE); 40 MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar)); 41 MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int)); 42 MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int)); 43 if (stash->array) FREE(stash->array); 44 stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy; 45 stash->nmax += CHUNCKSIZE; 46 } 47 stash->array[stash->n] = val; 48 stash->idx[stash->n] = row; 49 stash->idy[stash->n++] = idxn[i]; 50 } 51 } 52 return 0; 53 } 54 55 /* local utility routine that creates a mapping from the global column 56 number to the local number in the off-diagonal part of the local 57 storage of the matrix. This is done in a non scable way since the 58 length of colmap equals the global matrix length. 59 */ 60 static int CreateColmap(Mat mat) 61 { 62 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 63 Mat_AIJ *B = (Mat_AIJ*) aij->B->data; 64 int n = B->n,i; 65 aij->colmap = (int *) MALLOC( aij->N*sizeof(int) ); CHKPTR(aij->colmap); 66 MEMSET(aij->colmap,0,aij->N*sizeof(int)); 67 for ( i=0; i<n; i++ ) { 68 aij->colmap[aij->garray[i]] = i+1; 69 } 70 return 0; 71 } 72 73 static int MatInsertValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 74 int *idxn,Scalar *v,InsertMode addv) 75 { 76 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 77 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 78 int cstart = aij->cstart, cend = aij->cend,row,col; 79 80 if (aij->insertmode != NotSetValues && aij->insertmode != addv) { 81 SETERR(1,"You cannot mix inserts and adds"); 82 } 83 aij->insertmode = addv; 84 for ( i=0; i<m; i++ ) { 85 if (idxm[i] < 0) SETERR(1,"Negative row index"); 86 if (idxm[i] >= aij->M) SETERR(1,"Row index too large"); 87 if (idxm[i] >= rstart && idxm[i] < rend) { 88 row = idxm[i] - rstart; 89 for ( j=0; j<n; j++ ) { 90 if (idxn[j] < 0) SETERR(1,"Negative column index"); 91 if (idxn[j] >= aij->N) SETERR(1,"Column index too large"); 92 if (idxn[j] >= cstart && idxn[j] < cend){ 93 col = idxn[j] - cstart; 94 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 95 } 96 else { 97 if (aij->assembled) { 98 if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);} 99 col = aij->colmap[idxn[j]] - 1; 100 if (col < 0) { 101 SETERR(1,"Cannot insert new off diagonal block nonzero in\ 102 already\ 103 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 104 if your need this feature"); 105 } 106 } 107 else col = idxn[j]; 108 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 109 } 110 } 111 } 112 else { 113 ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr); 114 } 115 } 116 return 0; 117 } 118 119 /* 120 the assembly code is alot like the code for vectors, we should 121 sometime derive a single assembly code that can be used for 122 either case. 123 */ 124 125 static int MatBeginAssemble_MPIAIJ(Mat mat) 126 { 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 128 MPI_Comm comm = mat->comm; 129 int numtids = aij->numtids, *owners = aij->rowners; 130 int mytid = aij->mytid; 131 MPI_Request *send_waits,*recv_waits; 132 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 133 int tag = 50, *owner,*starts,count; 134 InsertMode addv; 135 Scalar *rvalues,*svalues; 136 137 /* make sure all processors are either in INSERTMODE or ADDMODE */ 138 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 139 MPI_BOR,comm); 140 if (addv == (AddValues|InsertValues)) { 141 SETERR(1,"Some processors have inserted while others have added"); 142 } 143 aij->insertmode = addv; /* in case this processor had no cache */ 144 145 /* first count number of contributors to each processor */ 146 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 147 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 148 owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner); 149 for ( i=0; i<aij->stash.n; i++ ) { 150 idx = aij->stash.idx[i]; 151 for ( j=0; j<numtids; j++ ) { 152 if (idx >= owners[j] && idx < owners[j+1]) { 153 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 154 } 155 } 156 } 157 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 158 159 /* inform other processors of number of messages and max length*/ 160 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 161 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 162 nreceives = work[mytid]; 163 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 164 nmax = work[mytid]; 165 FREE(work); 166 167 /* post receives: 168 1) each message will consist of ordered pairs 169 (global index,value) we store the global index as a double 170 to simplify the message passing. 171 2) since we don't know how long each individual message is we 172 allocate the largest needed buffer for each receive. Potentially 173 this is a lot of wasted space. 174 175 176 This could be done better. 177 */ 178 rvalues = (Scalar *) MALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 179 CHKPTR(rvalues); 180 recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request)); 181 CHKPTR(recv_waits); 182 for ( i=0; i<nreceives; i++ ) { 183 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag, 184 comm,recv_waits+i); 185 } 186 187 /* do sends: 188 1) starts[i] gives the starting index in svalues for stuff going to 189 the ith processor 190 */ 191 svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 192 CHKPTR(svalues); 193 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTR(send_waits); 195 starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts); 196 starts[0] = 0; 197 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 FREE(owner); 204 starts[0] = 0; 205 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<numtids; i++ ) { 208 if (procs[i]) { 209 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 FREE(starts); FREE(nprocs); 214 215 /* Free cache space */ 216 aij->stash.nmax = aij->stash.n = 0; 217 if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;} 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MatSetUpMultiply_MPIAIJ(Mat); 227 228 static int MatEndAssemble_MPIAIJ(Mat mat) 229 { 230 int ierr; 231 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 232 233 MPI_Status *send_status,recv_status; 234 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 235 int row,col; 236 Scalar *values,val; 237 InsertMode addv = aij->insertmode; 238 239 /* wait on receives */ 240 while (count) { 241 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 242 /* unpack receives into our local space */ 243 values = aij->rvalues + 3*imdex*aij->rmax; 244 MPI_Get_count(&recv_status,MPI_SCALAR,&n); 245 n = n/3; 246 for ( i=0; i<n; i++ ) { 247 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 248 col = (int) PETSCREAL(values[3*i+1]); 249 val = values[3*i+2]; 250 if (col >= aij->cstart && col < aij->cend) { 251 col -= aij->cstart; 252 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 253 } 254 else { 255 if (aij->assembled) { 256 if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);} 257 col = aij->colmap[col] - 1; 258 if (col < 0) { 259 SETERR(1,"Cannot insert new off diagonal block nonzero in\ 260 already\ 261 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 262 if your need this feature"); 263 } 264 } 265 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 266 } 267 } 268 count--; 269 } 270 FREE(aij->recv_waits); FREE(aij->rvalues); 271 272 /* wait on sends */ 273 if (aij->nsends) { 274 send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) ); 275 CHKPTR(send_status); 276 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 277 FREE(send_status); 278 } 279 FREE(aij->send_waits); FREE(aij->svalues); 280 281 aij->insertmode = NotSetValues; 282 ierr = MatBeginAssembly(aij->A); CHKERR(ierr); 283 ierr = MatEndAssembly(aij->A); CHKERR(ierr); 284 285 if (!aij->assembled) { 286 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERR(ierr); 287 } 288 ierr = MatBeginAssembly(aij->B); CHKERR(ierr); 289 ierr = MatEndAssembly(aij->B); CHKERR(ierr); 290 291 aij->assembled = 1; 292 return 0; 293 } 294 295 static int MatZero_MPIAIJ(Mat A) 296 { 297 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 298 299 MatZeroEntries(l->A); MatZeroEntries(l->B); 300 return 0; 301 } 302 303 /* again this uses the same basic stratagy as in the assembly and 304 scatter create routines, we should try to do it systemamatically 305 if we can figure out the proper level of generality. */ 306 307 /* the code does not do the diagonal entries correctly unless the 308 matrix is square and the column and row owerships are identical. 309 This is a BUG. The only way to fix it seems to be to access 310 aij->A and aij->B directly and not through the MatZeroRows() 311 routine. 312 */ 313 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 314 { 315 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 316 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 317 int *procs,*nprocs,j,found,idx,nsends,*work; 318 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 319 int *rvalues,tag = 67,count,base,slen,n,*source; 320 int *lens,imdex,*lrows,*values; 321 MPI_Comm comm = A->comm; 322 MPI_Request *send_waits,*recv_waits; 323 MPI_Status recv_status,*send_status; 324 IS istmp; 325 326 if (!l->assembled) SETERR(1,"MatZeroRows_MPIAIJ: must assemble matrix first"); 327 ierr = ISGetLocalSize(is,&N); CHKERR(ierr); 328 ierr = ISGetIndices(is,&rows); CHKERR(ierr); 329 330 /* first count number of contributors to each processor */ 331 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 332 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 333 owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/ 334 for ( i=0; i<N; i++ ) { 335 idx = rows[i]; 336 found = 0; 337 for ( j=0; j<numtids; j++ ) { 338 if (idx >= owners[j] && idx < owners[j+1]) { 339 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 340 } 341 } 342 if (!found) SETERR(1,"Imdex out of range"); 343 } 344 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 345 346 /* inform other processors of number of messages and max length*/ 347 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 348 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 349 nrecvs = work[mytid]; 350 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 351 nmax = work[mytid]; 352 FREE(work); 353 354 /* post receives: */ 355 rvalues = (int *) MALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 356 CHKPTR(rvalues); 357 recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request)); 358 CHKPTR(recv_waits); 359 for ( i=0; i<nrecvs; i++ ) { 360 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 361 comm,recv_waits+i); 362 } 363 364 /* do sends: 365 1) starts[i] gives the starting index in svalues for stuff going to 366 the ith processor 367 */ 368 svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues); 369 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 370 CHKPTR(send_waits); 371 starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts); 372 starts[0] = 0; 373 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 374 for ( i=0; i<N; i++ ) { 375 svalues[starts[owner[i]]++] = rows[i]; 376 } 377 ISRestoreIndices(is,&rows); 378 379 starts[0] = 0; 380 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 381 count = 0; 382 for ( i=0; i<numtids; i++ ) { 383 if (procs[i]) { 384 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 385 comm,send_waits+count++); 386 } 387 } 388 FREE(starts); 389 390 base = owners[mytid]; 391 392 /* wait on receives */ 393 lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens); 394 source = lens + nrecvs; 395 count = nrecvs; slen = 0; 396 while (count) { 397 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 398 /* unpack receives into our local space */ 399 MPI_Get_count(&recv_status,MPI_INT,&n); 400 source[imdex] = recv_status.MPI_SOURCE; 401 lens[imdex] = n; 402 slen += n; 403 count--; 404 } 405 FREE(recv_waits); 406 407 /* move the data into the send scatter */ 408 lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows); 409 count = 0; 410 for ( i=0; i<nrecvs; i++ ) { 411 values = rvalues + i*nmax; 412 for ( j=0; j<lens[i]; j++ ) { 413 lrows[count++] = values[j] - base; 414 } 415 } 416 FREE(rvalues); FREE(lens); 417 FREE(owner); FREE(nprocs); 418 419 /* actually zap the local rows */ 420 ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr); FREE(lrows); 421 ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr); 422 ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr); 423 ierr = ISDestroy(istmp); CHKERR(ierr); 424 425 /* wait on sends */ 426 if (nsends) { 427 send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) ); 428 CHKPTR(send_status); 429 MPI_Waitall(nsends,send_waits,send_status); 430 FREE(send_status); 431 } 432 FREE(send_waits); FREE(svalues); 433 434 435 return 0; 436 } 437 438 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 439 { 440 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 441 int ierr; 442 if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first"); 443 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 444 CHKERR(ierr); 445 ierr = MatMult(aij->A,xx,yy); CHKERR(ierr); 446 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 447 CHKERR(ierr); 448 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr); 449 return 0; 450 } 451 452 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 453 { 454 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 455 int ierr; 456 if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first"); 457 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 458 CHKERR(ierr); 459 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr); 460 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 461 CHKERR(ierr); 462 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr); 463 return 0; 464 } 465 466 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 467 { 468 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 469 int ierr; 470 471 if (!aij->assembled) 472 SETERR(1,"MatMulTrans_MPIAIJ: must assemble matrix first"); 473 /* do nondiagonal part */ 474 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 475 /* send it on its way */ 476 ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues, 477 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 478 /* do local part */ 479 ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr); 480 /* receive remote parts: note this assumes the values are not actually */ 481 /* inserted in yy until the next line, which is true for my implementation*/ 482 /* but is not perhaps always true. */ 483 ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse, 484 aij->Mvctx); CHKERR(ierr); 485 return 0; 486 } 487 488 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 489 { 490 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 491 int ierr; 492 493 if (!aij->assembled) 494 SETERR(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first"); 495 /* do nondiagonal part */ 496 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 497 /* send it on its way */ 498 ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues, 499 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 500 /* do local part */ 501 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr); 502 /* receive remote parts: note this assumes the values are not actually */ 503 /* inserted in yy until the next line, which is true for my implementation*/ 504 /* but is not perhaps always true. */ 505 ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse, 506 aij->Mvctx); CHKERR(ierr); 507 return 0; 508 } 509 510 /* 511 This only works correctly for square matrices where the subblock A->A is the 512 diagonal block 513 */ 514 static int MatGetDiag_MPIAIJ(Mat Ain,Vec v) 515 { 516 Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 517 if (!A->assembled) SETERR(1,"MatGetDiag_MPIAIJ: must assemble matrix first"); 518 return MatGetDiagonal(A->A,v); 519 } 520 521 static int MatDestroy_MPIAIJ(PetscObject obj) 522 { 523 Mat mat = (Mat) obj; 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 525 int ierr; 526 #if defined(PETSC_LOG) 527 PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N); 528 #endif 529 FREE(aij->rowners); 530 ierr = MatDestroy(aij->A); CHKERR(ierr); 531 ierr = MatDestroy(aij->B); CHKERR(ierr); 532 if (aij->colmap) FREE(aij->colmap); 533 if (aij->garray) FREE(aij->garray); 534 if (aij->lvec) VecDestroy(aij->lvec); 535 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 536 FREE(aij); 537 PLogObjectDestroy(mat); 538 PETSCHEADERDESTROY(mat); 539 return 0; 540 } 541 542 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 543 { 544 Mat mat = (Mat) obj; 545 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 546 int ierr; 547 548 if (!aij->assembled) SETERR(1,"MatView_MPIAIJ: must assemble matrix first"); 549 MPE_Seq_begin(mat->comm,1); 550 ViewerPrintf(viewer,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 551 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 552 aij->cend); 553 ierr = MatView(aij->A,viewer); CHKERR(ierr); 554 ierr = MatView(aij->B,viewer); CHKERR(ierr); 555 ViewerFlush(viewer); 556 MPE_Seq_end(mat->comm,1); 557 return 0; 558 } 559 560 extern int MatMarkDiag_AIJ(Mat_AIJ *); 561 /* 562 This has to provide several versions. 563 564 1) per sequential 565 2) a) use only local smoothing updating outer values only once. 566 b) local smoothing updating outer values each inner iteration 567 3) color updating out values betwen colors. 568 */ 569 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,int flag,double shift, 570 int its,Vec xx) 571 { 572 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 573 Mat AA = mat->A, BB = mat->B; 574 Mat_AIJ *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data; 575 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 576 int ierr,*idx, *diag; 577 int n = mat->n, m = mat->m, i; 578 Vec tt; 579 580 if (!mat->assembled) SETERR(1,"MatRelax_MPIAIJ: must assemble matrix first"); 581 582 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 583 xs = x -1; /* shift by one for index start of 1 */ 584 ls--; 585 if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;} 586 diag = A->diag; 587 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 588 SETERR(1,"That option not yet support for parallel AIJ matrices"); 589 } 590 if (flag & SOR_EISENSTAT) { 591 /* Let A = L + U + D; where L is lower trianglar, 592 U is upper triangular, E is diagonal; This routine applies 593 594 (L + E)^{-1} A (U + E)^{-1} 595 596 to a vector efficiently using Eisenstat's trick. This is for 597 the case of SSOR preconditioner, so E is D/omega where omega 598 is the relaxation factor. 599 */ 600 ierr = VecCreate(xx,&tt); CHKERR(ierr); 601 VecGetArray(tt,&t); 602 scale = (2.0/omega) - 1.0; 603 /* x = (E + U)^{-1} b */ 604 VecSet(&zero,mat->lvec); 605 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 606 mat->Mvctx); CHKERR(ierr); 607 for ( i=m-1; i>-1; i-- ) { 608 n = A->i[i+1] - diag[i] - 1; 609 idx = A->j + diag[i]; 610 v = A->a + diag[i]; 611 sum = b[i]; 612 SPARSEDENSEMDOT(sum,xs,v,idx,n); 613 d = shift + A->a[diag[i]-1]; 614 n = B->i[i+1] - B->i[i]; 615 idx = B->j + B->i[i] - 1; 616 v = B->a + B->i[i] - 1; 617 SPARSEDENSEMDOT(sum,ls,v,idx,n); 618 x[i] = omega*(sum/d); 619 } 620 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 621 mat->Mvctx); CHKERR(ierr); 622 623 /* t = b - (2*E - D)x */ 624 v = A->a; 625 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 626 627 /* t = (E + L)^{-1}t */ 628 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 629 diag = A->diag; 630 VecSet(&zero,mat->lvec); 631 ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown, 632 mat->Mvctx); CHKERR(ierr); 633 for ( i=0; i<m; i++ ) { 634 n = diag[i] - A->i[i]; 635 idx = A->j + A->i[i] - 1; 636 v = A->a + A->i[i] - 1; 637 sum = t[i]; 638 SPARSEDENSEMDOT(sum,ts,v,idx,n); 639 d = shift + A->a[diag[i]-1]; 640 n = B->i[i+1] - B->i[i]; 641 idx = B->j + B->i[i] - 1; 642 v = B->a + B->i[i] - 1; 643 SPARSEDENSEMDOT(sum,ls,v,idx,n); 644 t[i] = omega*(sum/d); 645 } 646 ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown, 647 mat->Mvctx); CHKERR(ierr); 648 /* x = x + t */ 649 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 650 VecDestroy(tt); 651 return 0; 652 } 653 654 655 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 656 if (flag & SOR_ZERO_INITIAL_GUESS) { 657 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 658 } 659 else { 660 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 661 CHKERR(ierr); 662 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 663 CHKERR(ierr); 664 } 665 while (its--) { 666 /* go down through the rows */ 667 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 668 mat->Mvctx); CHKERR(ierr); 669 for ( i=0; i<m; i++ ) { 670 n = A->i[i+1] - A->i[i]; 671 idx = A->j + A->i[i] - 1; 672 v = A->a + A->i[i] - 1; 673 sum = b[i]; 674 SPARSEDENSEMDOT(sum,xs,v,idx,n); 675 d = shift + A->a[diag[i]-1]; 676 n = B->i[i+1] - B->i[i]; 677 idx = B->j + B->i[i] - 1; 678 v = B->a + B->i[i] - 1; 679 SPARSEDENSEMDOT(sum,ls,v,idx,n); 680 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 681 } 682 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 683 mat->Mvctx); CHKERR(ierr); 684 /* come up through the rows */ 685 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 686 mat->Mvctx); CHKERR(ierr); 687 for ( i=m-1; i>-1; i-- ) { 688 n = A->i[i+1] - A->i[i]; 689 idx = A->j + A->i[i] - 1; 690 v = A->a + A->i[i] - 1; 691 sum = b[i]; 692 SPARSEDENSEMDOT(sum,xs,v,idx,n); 693 d = shift + A->a[diag[i]-1]; 694 n = B->i[i+1] - B->i[i]; 695 idx = B->j + B->i[i] - 1; 696 v = B->a + B->i[i] - 1; 697 SPARSEDENSEMDOT(sum,ls,v,idx,n); 698 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 699 } 700 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 701 mat->Mvctx); CHKERR(ierr); 702 } 703 } 704 else if (flag & SOR_FORWARD_SWEEP){ 705 if (flag & SOR_ZERO_INITIAL_GUESS) { 706 VecSet(&zero,mat->lvec); 707 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 708 mat->Mvctx); CHKERR(ierr); 709 for ( i=0; i<m; i++ ) { 710 n = diag[i] - A->i[i]; 711 idx = A->j + A->i[i] - 1; 712 v = A->a + A->i[i] - 1; 713 sum = b[i]; 714 SPARSEDENSEMDOT(sum,xs,v,idx,n); 715 d = shift + A->a[diag[i]-1]; 716 n = B->i[i+1] - B->i[i]; 717 idx = B->j + B->i[i] - 1; 718 v = B->a + B->i[i] - 1; 719 SPARSEDENSEMDOT(sum,ls,v,idx,n); 720 x[i] = omega*(sum/d); 721 } 722 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 723 mat->Mvctx); CHKERR(ierr); 724 its--; 725 } 726 while (its--) { 727 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 728 CHKERR(ierr); 729 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 730 CHKERR(ierr); 731 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 732 mat->Mvctx); CHKERR(ierr); 733 for ( i=0; i<m; i++ ) { 734 n = A->i[i+1] - A->i[i]; 735 idx = A->j + A->i[i] - 1; 736 v = A->a + A->i[i] - 1; 737 sum = b[i]; 738 SPARSEDENSEMDOT(sum,xs,v,idx,n); 739 d = shift + A->a[diag[i]-1]; 740 n = B->i[i+1] - B->i[i]; 741 idx = B->j + B->i[i] - 1; 742 v = B->a + B->i[i] - 1; 743 SPARSEDENSEMDOT(sum,ls,v,idx,n); 744 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 745 } 746 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 747 mat->Mvctx); CHKERR(ierr); 748 } 749 } 750 else if (flag & SOR_BACKWARD_SWEEP){ 751 if (flag & SOR_ZERO_INITIAL_GUESS) { 752 VecSet(&zero,mat->lvec); 753 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 754 mat->Mvctx); CHKERR(ierr); 755 for ( i=m-1; i>-1; i-- ) { 756 n = A->i[i+1] - diag[i] - 1; 757 idx = A->j + diag[i]; 758 v = A->a + diag[i]; 759 sum = b[i]; 760 SPARSEDENSEMDOT(sum,xs,v,idx,n); 761 d = shift + A->a[diag[i]-1]; 762 n = B->i[i+1] - B->i[i]; 763 idx = B->j + B->i[i] - 1; 764 v = B->a + B->i[i] - 1; 765 SPARSEDENSEMDOT(sum,ls,v,idx,n); 766 x[i] = omega*(sum/d); 767 } 768 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 769 mat->Mvctx); CHKERR(ierr); 770 its--; 771 } 772 while (its--) { 773 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown, 774 mat->Mvctx); CHKERR(ierr); 775 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown, 776 mat->Mvctx); CHKERR(ierr); 777 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 778 mat->Mvctx); CHKERR(ierr); 779 for ( i=m-1; i>-1; i-- ) { 780 n = A->i[i+1] - A->i[i]; 781 idx = A->j + A->i[i] - 1; 782 v = A->a + A->i[i] - 1; 783 sum = b[i]; 784 SPARSEDENSEMDOT(sum,xs,v,idx,n); 785 d = shift + A->a[diag[i]-1]; 786 n = B->i[i+1] - B->i[i]; 787 idx = B->j + B->i[i] - 1; 788 v = B->a + B->i[i] - 1; 789 SPARSEDENSEMDOT(sum,ls,v,idx,n); 790 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 791 } 792 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 793 mat->Mvctx); CHKERR(ierr); 794 } 795 } 796 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 797 if (flag & SOR_ZERO_INITIAL_GUESS) { 798 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 799 } 800 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 801 CHKERR(ierr); 802 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 803 CHKERR(ierr); 804 while (its--) { 805 /* go down through the rows */ 806 for ( i=0; i<m; i++ ) { 807 n = A->i[i+1] - A->i[i]; 808 idx = A->j + A->i[i] - 1; 809 v = A->a + A->i[i] - 1; 810 sum = b[i]; 811 SPARSEDENSEMDOT(sum,xs,v,idx,n); 812 d = shift + A->a[diag[i]-1]; 813 n = B->i[i+1] - B->i[i]; 814 idx = B->j + B->i[i] - 1; 815 v = B->a + B->i[i] - 1; 816 SPARSEDENSEMDOT(sum,ls,v,idx,n); 817 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 818 } 819 /* come up through the rows */ 820 for ( i=m-1; i>-1; i-- ) { 821 n = A->i[i+1] - A->i[i]; 822 idx = A->j + A->i[i] - 1; 823 v = A->a + A->i[i] - 1; 824 sum = b[i]; 825 SPARSEDENSEMDOT(sum,xs,v,idx,n); 826 d = shift + A->a[diag[i]-1]; 827 n = B->i[i+1] - B->i[i]; 828 idx = B->j + B->i[i] - 1; 829 v = B->a + B->i[i] - 1; 830 SPARSEDENSEMDOT(sum,ls,v,idx,n); 831 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 832 } 833 } 834 } 835 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 836 if (flag & SOR_ZERO_INITIAL_GUESS) { 837 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 838 } 839 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 840 CHKERR(ierr); 841 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 842 CHKERR(ierr); 843 while (its--) { 844 for ( i=0; i<m; i++ ) { 845 n = A->i[i+1] - A->i[i]; 846 idx = A->j + A->i[i] - 1; 847 v = A->a + A->i[i] - 1; 848 sum = b[i]; 849 SPARSEDENSEMDOT(sum,xs,v,idx,n); 850 d = shift + A->a[diag[i]-1]; 851 n = B->i[i+1] - B->i[i]; 852 idx = B->j + B->i[i] - 1; 853 v = B->a + B->i[i] - 1; 854 SPARSEDENSEMDOT(sum,ls,v,idx,n); 855 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 856 } 857 } 858 } 859 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 860 if (flag & SOR_ZERO_INITIAL_GUESS) { 861 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 862 } 863 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll, 864 mat->Mvctx); CHKERR(ierr); 865 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll, 866 mat->Mvctx); CHKERR(ierr); 867 while (its--) { 868 for ( i=m-1; i>-1; i-- ) { 869 n = A->i[i+1] - A->i[i]; 870 idx = A->j + A->i[i] - 1; 871 v = A->a + A->i[i] - 1; 872 sum = b[i]; 873 SPARSEDENSEMDOT(sum,xs,v,idx,n); 874 d = shift + A->a[diag[i]-1]; 875 n = B->i[i+1] - B->i[i]; 876 idx = B->j + B->i[i] - 1; 877 v = B->a + B->i[i] - 1; 878 SPARSEDENSEMDOT(sum,ls,v,idx,n); 879 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 880 } 881 } 882 } 883 return 0; 884 } 885 static int MatInsOpt_MPIAIJ(Mat aijin,int op) 886 { 887 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 888 889 if (op == NO_NEW_NONZERO_LOCATIONS) { 890 MatSetOption(aij->A,op); 891 MatSetOption(aij->B,op); 892 } 893 else if (op == YES_NEW_NONZERO_LOCATIONS) { 894 MatSetOption(aij->A,op); 895 MatSetOption(aij->B,op); 896 } 897 else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported"); 898 return 0; 899 } 900 901 static int MatSize_MPIAIJ(Mat matin,int *m,int *n) 902 { 903 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 904 *m = mat->M; *n = mat->N; 905 return 0; 906 } 907 908 static int MatLocalSize_MPIAIJ(Mat matin,int *m,int *n) 909 { 910 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 911 *m = mat->m; *n = mat->n; 912 return 0; 913 } 914 915 static int MatRange_MPIAIJ(Mat matin,int *m,int *n) 916 { 917 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 918 *m = mat->rstart; *n = mat->rend; 919 return 0; 920 } 921 922 static int MatCopy_MPIAIJ(Mat,Mat *); 923 extern int MatConvert_MPIAIJ(Mat,MATTYPE,Mat *); 924 925 /* -------------------------------------------------------------------*/ 926 static struct _MatOps MatOps = {MatInsertValues_MPIAIJ, 927 0, 0, 928 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 929 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 930 0,0,0,0, 931 0,0, 932 MatRelax_MPIAIJ, 933 0, 934 0,0,0, 935 MatCopy_MPIAIJ, 936 MatGetDiag_MPIAIJ,0,0, 937 MatBeginAssemble_MPIAIJ,MatEndAssemble_MPIAIJ, 938 0, 939 MatInsOpt_MPIAIJ,MatZero_MPIAIJ,MatZeroRows_MPIAIJ,0, 940 0,0,0,0, 941 MatSize_MPIAIJ,MatLocalSize_MPIAIJ,MatRange_MPIAIJ, 942 0,0, 943 0,MatConvert_MPIAIJ }; 944 945 /*@ 946 947 MatCreateMPIAIJ - Creates a sparse parallel matrix 948 in AIJ format. 949 950 Input Parameters: 951 . comm - MPI communicator 952 . m,n - number of local rows and columns (or -1 to have calculated) 953 . M,N - global rows and columns (or -1 to have calculated) 954 . d_nz - total number nonzeros in diagonal portion of matrix 955 . d_nzz - number of nonzeros per row in diagonal portion of matrix or null 956 . You must leave room for the diagonal entry even if it is zero. 957 . o_nz - total number nonzeros in off-diagonal portion of matrix 958 . o_nzz - number of nonzeros per row in off-diagonal portion of matrix 959 . or null. You must have at least one nonzero per row. 960 961 Output parameters: 962 . newmat - the matrix 963 964 Keywords: matrix, aij, compressed row, sparse, parallel 965 @*/ 966 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 967 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 968 { 969 Mat mat; 970 Mat_MPIAIJ *aij; 971 int ierr, i,sum[2],work[2]; 972 *newmat = 0; 973 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 974 PLogObjectCreate(mat); 975 mat->data = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij); 976 mat->ops = &MatOps; 977 mat->destroy = MatDestroy_MPIAIJ; 978 mat->view = MatView_MPIAIJ; 979 mat->factor = 0; 980 mat->row = 0; 981 mat->col = 0; 982 983 mat->comm = comm; 984 aij->insertmode = NotSetValues; 985 MPI_Comm_rank(comm,&aij->mytid); 986 MPI_Comm_size(comm,&aij->numtids); 987 988 if (M == -1 || N == -1) { 989 work[0] = m; work[1] = n; 990 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 991 if (M == -1) M = sum[0]; 992 if (N == -1) N = sum[1]; 993 } 994 if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 995 if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 996 aij->m = m; 997 aij->n = n; 998 aij->N = N; 999 aij->M = M; 1000 1001 /* build local table of row and column ownerships */ 1002 aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int)); 1003 CHKPTR(aij->rowners); 1004 aij->cowners = aij->rowners + aij->numtids + 1; 1005 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 1006 aij->rowners[0] = 0; 1007 for ( i=2; i<=aij->numtids; i++ ) { 1008 aij->rowners[i] += aij->rowners[i-1]; 1009 } 1010 aij->rstart = aij->rowners[aij->mytid]; 1011 aij->rend = aij->rowners[aij->mytid+1]; 1012 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 1013 aij->cowners[0] = 0; 1014 for ( i=2; i<=aij->numtids; i++ ) { 1015 aij->cowners[i] += aij->cowners[i-1]; 1016 } 1017 aij->cstart = aij->cowners[aij->mytid]; 1018 aij->cend = aij->cowners[aij->mytid+1]; 1019 1020 1021 ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr); 1022 PLogObjectParent(mat,aij->A); 1023 ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr); 1024 PLogObjectParent(mat,aij->B); 1025 1026 /* build cache for off array entries formed */ 1027 aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */ 1028 aij->stash.n = 0; 1029 aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) + 1030 sizeof(Scalar))); CHKPTR(aij->stash.array); 1031 aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax); 1032 aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax); 1033 aij->colmap = 0; 1034 aij->garray = 0; 1035 1036 /* stuff used for matrix vector multiply */ 1037 aij->lvec = 0; 1038 aij->Mvctx = 0; 1039 aij->assembled = 0; 1040 1041 *newmat = mat; 1042 return 0; 1043 } 1044 1045 static int MatCopy_MPIAIJ(Mat matin,Mat *newmat) 1046 { 1047 Mat mat; 1048 Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 1049 int ierr; 1050 *newmat = 0; 1051 1052 if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix"); 1053 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1054 PLogObjectCreate(mat); 1055 mat->data = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij); 1056 mat->ops = &MatOps; 1057 mat->destroy = MatDestroy_MPIAIJ; 1058 mat->view = MatView_MPIAIJ; 1059 mat->factor = matin->factor; 1060 mat->row = 0; 1061 mat->col = 0; 1062 1063 aij->m = oldmat->m; 1064 aij->n = oldmat->n; 1065 aij->M = oldmat->M; 1066 aij->N = oldmat->N; 1067 1068 aij->assembled = 1; 1069 aij->rstart = oldmat->rstart; 1070 aij->rend = oldmat->rend; 1071 aij->cstart = oldmat->cstart; 1072 aij->cend = oldmat->cend; 1073 aij->numtids = oldmat->numtids; 1074 aij->mytid = oldmat->mytid; 1075 aij->insertmode = NotSetValues; 1076 1077 aij->rowners = (int *) MALLOC( (aij->numtids+1)*sizeof(int) ); 1078 CHKPTR(aij->rowners); 1079 MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1080 aij->stash.nmax = 0; 1081 aij->stash.n = 0; 1082 aij->stash.array= 0; 1083 aij->colmap = 0; 1084 aij->garray = 0; 1085 mat->comm = matin->comm; 1086 1087 ierr = VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr); 1088 PLogObjectParent(mat,aij->lvec); 1089 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr); 1090 PLogObjectParent(mat,aij->Mvctx); 1091 ierr = MatCopy(oldmat->A,&aij->A); CHKERR(ierr); 1092 PLogObjectParent(mat,aij->A); 1093 ierr = MatCopy(oldmat->B,&aij->B); CHKERR(ierr); 1094 PLogObjectParent(mat,aij->B); 1095 *newmat = mat; 1096 return 0; 1097 } 1098