1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.9 1995/03/17 00:29:15 bsmith Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 #define CHUNCKSIZE 100 10 /* 11 This is a simple minded stash. Do a linear search to determine if 12 in stash, if not add to end. 13 */ 14 static int StashValues(Stash *stash,int row,int n, int *idxn, 15 Scalar *values,InsertMode addv) 16 { 17 int i,j,N = stash->n,found,*n_idx, *n_idy; 18 Scalar val,*n_array; 19 20 for ( i=0; i<n; i++ ) { 21 found = 0; 22 val = *values++; 23 for ( j=0; j<N; j++ ) { 24 if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) { 25 /* found a match */ 26 if (addv == AddValues) stash->array[j] += val; 27 else stash->array[j] = val; 28 found = 1; 29 break; 30 } 31 } 32 if (!found) { /* not found so add to end */ 33 if ( stash->n == stash->nmax ) { 34 /* allocate a larger stash */ 35 n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*( 36 2*sizeof(int) + sizeof(Scalar))); 37 CHKPTR(n_array); 38 n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE); 39 n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE); 40 MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar)); 41 MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int)); 42 MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int)); 43 if (stash->array) FREE(stash->array); 44 stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy; 45 stash->nmax += CHUNCKSIZE; 46 } 47 stash->array[stash->n] = val; 48 stash->idx[stash->n] = row; 49 stash->idy[stash->n++] = idxn[i]; 50 } 51 } 52 return 0; 53 } 54 55 /* local utility routine that creates a mapping from the global column 56 number to the local number in the off-diagonal part of the local 57 storage of the matrix. This is done in a non scable way since the 58 length of colmap equals the global matrix length. 59 */ 60 static int CreateColmap(Mat mat) 61 { 62 Matimpiaij *aij = (Matimpiaij *) mat->data; 63 Matiaij *B = (Matiaij*) aij->B->data; 64 int n = B->n,i; 65 aij->colmap = (int *) MALLOC( aij->N*sizeof(int) ); CHKPTR(aij->colmap); 66 MEMSET(aij->colmap,0,aij->N*sizeof(int)); 67 for ( i=0; i<n; i++ ) { 68 aij->colmap[aij->garray[i]] = i+1; 69 } 70 return 0; 71 } 72 73 static int MatiAIJInsertValues(Mat mat,int m,int *idxm,int n, 74 int *idxn,Scalar *v,InsertMode addv) 75 { 76 Matimpiaij *aij = (Matimpiaij *) mat->data; 77 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 78 int cstart = aij->cstart, cend = aij->cend,row,col; 79 80 if (aij->insertmode != NotSetValues && aij->insertmode != addv) { 81 SETERR(1,"You cannot mix inserts and adds"); 82 } 83 aij->insertmode = addv; 84 for ( i=0; i<m; i++ ) { 85 if (idxm[i] < 0) SETERR(1,"Negative row index"); 86 if (idxm[i] >= aij->M) SETERR(1,"Row index too large"); 87 if (idxm[i] >= rstart && idxm[i] < rend) { 88 row = idxm[i] - rstart; 89 for ( j=0; j<n; j++ ) { 90 if (idxn[j] < 0) SETERR(1,"Negative column index"); 91 if (idxn[j] >= aij->N) SETERR(1,"Column index too large"); 92 if (idxn[j] >= cstart && idxn[j] < cend){ 93 col = idxn[j] - cstart; 94 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 95 } 96 else { 97 if (aij->assembled) { 98 if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);} 99 col = aij->colmap[idxn[j]] - 1; 100 if (col < 0) { 101 SETERR(1,"Cannot insert new off diagonal block nonzero in\ 102 already\ 103 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 104 if your need this feature"); 105 } 106 } 107 else col = idxn[j]; 108 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 109 } 110 } 111 } 112 else { 113 ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr); 114 } 115 } 116 return 0; 117 } 118 119 /* 120 the assembly code is alot like the code for vectors, we should 121 sometime derive a single assembly code that can be used for 122 either case. 123 */ 124 125 static int MatiAIJBeginAssemble(Mat mat) 126 { 127 Matimpiaij *aij = (Matimpiaij *) mat->data; 128 MPI_Comm comm = mat->comm; 129 int numtids = aij->numtids, *owners = aij->rowners; 130 int mytid = aij->mytid; 131 MPI_Request *send_waits,*recv_waits; 132 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 133 int tag = 50, *owner,*starts,count; 134 InsertMode addv; 135 Scalar *rvalues,*svalues; 136 137 /* make sure all processors are either in INSERTMODE or ADDMODE */ 138 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 139 MPI_BOR,comm); 140 if (addv == (AddValues|InsertValues)) { 141 SETERR(1,"Some processors have inserted while others have added"); 142 } 143 aij->insertmode = addv; /* in case this processor had no cache */ 144 145 /* first count number of contributors to each processor */ 146 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 147 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 148 owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner); 149 for ( i=0; i<aij->stash.n; i++ ) { 150 idx = aij->stash.idx[i]; 151 for ( j=0; j<numtids; j++ ) { 152 if (idx >= owners[j] && idx < owners[j+1]) { 153 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 154 } 155 } 156 } 157 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 158 159 /* inform other processors of number of messages and max length*/ 160 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 161 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 162 nreceives = work[mytid]; 163 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 164 nmax = work[mytid]; 165 FREE(work); 166 167 /* post receives: 168 1) each message will consist of ordered pairs 169 (global index,value) we store the global index as a double 170 to simplify the message passing. 171 2) since we don't know how long each individual message is we 172 allocate the largest needed buffer for each receive. Potentially 173 this is a lot of wasted space. 174 175 176 This could be done better. 177 */ 178 rvalues = (Scalar *) MALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 179 CHKPTR(rvalues); 180 recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request)); 181 CHKPTR(recv_waits); 182 for ( i=0; i<nreceives; i++ ) { 183 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag, 184 comm,recv_waits+i); 185 } 186 187 /* do sends: 188 1) starts[i] gives the starting index in svalues for stuff going to 189 the ith processor 190 */ 191 svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 192 CHKPTR(svalues); 193 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 194 CHKPTR(send_waits); 195 starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts); 196 starts[0] = 0; 197 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 198 for ( i=0; i<aij->stash.n; i++ ) { 199 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 200 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 201 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 202 } 203 FREE(owner); 204 starts[0] = 0; 205 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 206 count = 0; 207 for ( i=0; i<numtids; i++ ) { 208 if (procs[i]) { 209 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag, 210 comm,send_waits+count++); 211 } 212 } 213 FREE(starts); FREE(nprocs); 214 215 /* Free cache space */ 216 aij->stash.nmax = aij->stash.n = 0; 217 if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;} 218 219 aij->svalues = svalues; aij->rvalues = rvalues; 220 aij->nsends = nsends; aij->nrecvs = nreceives; 221 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 222 aij->rmax = nmax; 223 224 return 0; 225 } 226 extern int MPIAIJSetUpMultiply(Mat); 227 228 static int MatiAIJEndAssemble(Mat mat) 229 { 230 int ierr; 231 Matimpiaij *aij = (Matimpiaij *) mat->data; 232 233 MPI_Status *send_status,recv_status; 234 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 235 int row,col; 236 Scalar *values,val; 237 InsertMode addv = aij->insertmode; 238 239 /* wait on receives */ 240 while (count) { 241 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 242 /* unpack receives into our local space */ 243 values = aij->rvalues + 3*imdex*aij->rmax; 244 MPI_Get_count(&recv_status,MPI_SCALAR,&n); 245 n = n/3; 246 for ( i=0; i<n; i++ ) { 247 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 248 col = (int) PETSCREAL(values[3*i+1]); 249 val = values[3*i+2]; 250 if (col >= aij->cstart && col < aij->cend) { 251 col -= aij->cstart; 252 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 253 } 254 else { 255 if (aij->assembled) { 256 if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);} 257 col = aij->colmap[col] - 1; 258 if (col < 0) { 259 SETERR(1,"Cannot insert new off diagonal block nonzero in\ 260 already\ 261 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 262 if your need this feature"); 263 } 264 } 265 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 266 } 267 } 268 count--; 269 } 270 FREE(aij->recv_waits); FREE(aij->rvalues); 271 272 /* wait on sends */ 273 if (aij->nsends) { 274 send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) ); 275 CHKPTR(send_status); 276 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 277 FREE(send_status); 278 } 279 FREE(aij->send_waits); FREE(aij->svalues); 280 281 aij->insertmode = NotSetValues; 282 ierr = MatBeginAssembly(aij->A); CHKERR(ierr); 283 ierr = MatEndAssembly(aij->A); CHKERR(ierr); 284 285 if (!aij->assembled) { 286 ierr = MPIAIJSetUpMultiply(mat); CHKERR(ierr); 287 } 288 ierr = MatBeginAssembly(aij->B); CHKERR(ierr); 289 ierr = MatEndAssembly(aij->B); CHKERR(ierr); 290 291 aij->assembled = 1; 292 return 0; 293 } 294 295 static int MatiZero(Mat A) 296 { 297 Matimpiaij *l = (Matimpiaij *) A->data; 298 299 MatZeroEntries(l->A); MatZeroEntries(l->B); 300 return 0; 301 } 302 303 /* again this uses the same basic stratagy as in the assembly and 304 scatter create routines, we should try to do it systemamatically 305 if we can figure out the proper level of generality. */ 306 307 /* the code does not do the diagonal entries correctly unless the 308 matrix is square and the column and row owerships are identical. 309 This is a BUG. The only way to fix it seems to be to access 310 aij->A and aij->B directly and not through the MatZeroRows() 311 routine. 312 */ 313 static int MatiZerorows(Mat A,IS is,Scalar *diag) 314 { 315 Matimpiaij *l = (Matimpiaij *) A->data; 316 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 317 int *procs,*nprocs,j,found,idx,nsends,*work; 318 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 319 int *rvalues,tag = 67,count,base,slen,n,*source; 320 int *lens,imdex,*lrows,*values; 321 MPI_Comm comm = A->comm; 322 MPI_Request *send_waits,*recv_waits; 323 MPI_Status recv_status,*send_status; 324 IS istmp; 325 326 if (!l->assembled) SETERR(1,"MatiZerorows: must assmble matrix first"); 327 ierr = ISGetLocalSize(is,&N); CHKERR(ierr); 328 ierr = ISGetIndices(is,&rows); CHKERR(ierr); 329 330 /* first count number of contributors to each processor */ 331 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 332 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 333 owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/ 334 for ( i=0; i<N; i++ ) { 335 idx = rows[i]; 336 found = 0; 337 for ( j=0; j<numtids; j++ ) { 338 if (idx >= owners[j] && idx < owners[j+1]) { 339 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 340 } 341 } 342 if (!found) SETERR(1,"Imdex out of range"); 343 } 344 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 345 346 /* inform other processors of number of messages and max length*/ 347 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 348 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 349 nrecvs = work[mytid]; 350 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 351 nmax = work[mytid]; 352 FREE(work); 353 354 /* post receives: */ 355 rvalues = (int *) MALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 356 CHKPTR(rvalues); 357 recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request)); 358 CHKPTR(recv_waits); 359 for ( i=0; i<nrecvs; i++ ) { 360 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 361 comm,recv_waits+i); 362 } 363 364 /* do sends: 365 1) starts[i] gives the starting index in svalues for stuff going to 366 the ith processor 367 */ 368 svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues); 369 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 370 CHKPTR(send_waits); 371 starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts); 372 starts[0] = 0; 373 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 374 for ( i=0; i<N; i++ ) { 375 svalues[starts[owner[i]]++] = rows[i]; 376 } 377 ISRestoreIndices(is,&rows); 378 379 starts[0] = 0; 380 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 381 count = 0; 382 for ( i=0; i<numtids; i++ ) { 383 if (procs[i]) { 384 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 385 comm,send_waits+count++); 386 } 387 } 388 FREE(starts); 389 390 base = owners[mytid]; 391 392 /* wait on receives */ 393 lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens); 394 source = lens + nrecvs; 395 count = nrecvs; slen = 0; 396 while (count) { 397 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 398 /* unpack receives into our local space */ 399 MPI_Get_count(&recv_status,MPI_INT,&n); 400 source[imdex] = recv_status.MPI_SOURCE; 401 lens[imdex] = n; 402 slen += n; 403 count--; 404 } 405 FREE(recv_waits); 406 407 /* move the data into the send scatter */ 408 lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows); 409 count = 0; 410 for ( i=0; i<nrecvs; i++ ) { 411 values = rvalues + i*nmax; 412 for ( j=0; j<lens[i]; j++ ) { 413 lrows[count++] = values[j] - base; 414 } 415 } 416 FREE(rvalues); FREE(lens); 417 FREE(owner); FREE(nprocs); 418 419 /* actually zap the local rows */ 420 ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr); FREE(lrows); 421 ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr); 422 ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr); 423 ierr = ISDestroy(istmp); CHKERR(ierr); 424 425 /* wait on sends */ 426 if (nsends) { 427 send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) ); 428 CHKPTR(send_status); 429 MPI_Waitall(nsends,send_waits,send_status); 430 FREE(send_status); 431 } 432 FREE(send_waits); FREE(svalues); 433 434 435 return 0; 436 } 437 438 static int MatiAIJMult(Mat aijin,Vec xx,Vec yy) 439 { 440 Matimpiaij *aij = (Matimpiaij *) aijin->data; 441 int ierr; 442 if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first"); 443 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 444 CHKERR(ierr); 445 ierr = MatMult(aij->A,xx,yy); CHKERR(ierr); 446 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 447 CHKERR(ierr); 448 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr); 449 return 0; 450 } 451 452 static int MatiAIJMultadd(Mat aijin,Vec xx,Vec yy,Vec zz) 453 { 454 Matimpiaij *aij = (Matimpiaij *) aijin->data; 455 int ierr; 456 if (!aij->assembled) SETERR(1,"MatiAIJMul: must assmble matrix first"); 457 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 458 CHKERR(ierr); 459 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr); 460 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 461 CHKERR(ierr); 462 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr); 463 return 0; 464 } 465 466 static int MatiAIJMultTrans(Mat aijin,Vec xx,Vec yy) 467 { 468 Matimpiaij *aij = (Matimpiaij *) aijin->data; 469 int ierr; 470 471 if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 472 /* do nondiagonal part */ 473 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 474 /* send it on its way */ 475 ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues, 476 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 477 /* do local part */ 478 ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr); 479 /* receive remote parts: note this assumes the values are not actually */ 480 /* inserted in yy until the next line, which is true for my implementation*/ 481 /* but is not perhaps always true. */ 482 ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse, 483 aij->Mvctx); CHKERR(ierr); 484 return 0; 485 } 486 487 static int MatiAIJMultTransadd(Mat aijin,Vec xx,Vec yy,Vec zz) 488 { 489 Matimpiaij *aij = (Matimpiaij *) aijin->data; 490 int ierr; 491 492 if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 493 /* do nondiagonal part */ 494 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 495 /* send it on its way */ 496 ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues, 497 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 498 /* do local part */ 499 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr); 500 /* receive remote parts: note this assumes the values are not actually */ 501 /* inserted in yy until the next line, which is true for my implementation*/ 502 /* but is not perhaps always true. */ 503 ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse, 504 aij->Mvctx); CHKERR(ierr); 505 return 0; 506 } 507 508 /* 509 This only works correctly for square matrices where the subblock A->A is the 510 diagonal block 511 */ 512 static int MatiAIJgetdiag(Mat Ain,Vec v) 513 { 514 Matimpiaij *A = (Matimpiaij *) Ain->data; 515 if (!A->assembled) SETERR(1,"MatiAIJgetdiag: must assmble matrix first"); 516 return MatGetDiagonal(A->A,v); 517 } 518 519 static int MatiAIJdestroy(PetscObject obj) 520 { 521 Mat mat = (Mat) obj; 522 Matimpiaij *aij = (Matimpiaij *) mat->data; 523 int ierr; 524 FREE(aij->rowners); 525 ierr = MatDestroy(aij->A); CHKERR(ierr); 526 ierr = MatDestroy(aij->B); CHKERR(ierr); 527 if (aij->colmap) FREE(aij->colmap); 528 if (aij->garray) FREE(aij->garray); 529 if (aij->lvec) VecDestroy(aij->lvec); 530 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 531 FREE(aij); PETSCHEADERDESTROY(mat); 532 return 0; 533 } 534 535 static int MatiView(PetscObject obj,Viewer viewer) 536 { 537 Mat mat = (Mat) obj; 538 Matimpiaij *aij = (Matimpiaij *) mat->data; 539 int ierr; 540 541 if (!aij->assembled) SETERR(1,"MatiAIJMulTrans: must assmble matrix first"); 542 MPE_Seq_begin(mat->comm,1); 543 printf("[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 544 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 545 aij->cend); 546 ierr = MatView(aij->A,0); CHKERR(ierr); 547 ierr = MatView(aij->B,0); CHKERR(ierr); 548 fflush(stdout); 549 MPE_Seq_end(mat->comm,1); 550 return 0; 551 } 552 553 extern int MatiAIJmarkdiag(Matiaij *); 554 /* 555 This has to provide several versions. 556 557 1) per sequential 558 2) a) use only local smoothing updating outer values only once. 559 b) local smoothing updating outer values each inner iteration 560 3) color updating out values betwen colors. 561 */ 562 static int MatiAIJrelax(Mat matin,Vec bb,double omega,int flag,double shift, 563 int its,Vec xx) 564 { 565 Matimpiaij *mat = (Matimpiaij *) matin->data; 566 Mat AA = mat->A, BB = mat->B; 567 Matiaij *A = (Matiaij *) AA->data, *B = (Matiaij *)BB->data; 568 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 569 int ierr,*idx, *diag; 570 int n = mat->n, m = mat->m, i; 571 Vec tt; 572 573 if (!mat->assembled) SETERR(1,"MatiAIJRelax: must assmble matrix first"); 574 575 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 576 xs = x -1; /* shift by one for index start of 1 */ 577 ls--; 578 if (!A->diag) {if ((ierr = MatiAIJmarkdiag(A))) return ierr;} 579 diag = A->diag; 580 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 581 SETERR(1,"That option not yet support for parallel AIJ matrices"); 582 } 583 if (flag & SOR_EISENSTAT) { 584 /* Let A = L + U + D; where L is lower trianglar, 585 U is upper triangular, E is diagonal; This routine applies 586 587 (L + E)^{-1} A (U + E)^{-1} 588 589 to a vector efficiently using Eisenstat's trick. This is for 590 the case of SSOR preconditioner, so E is D/omega where omega 591 is the relaxation factor. 592 */ 593 ierr = VecCreate(xx,&tt); CHKERR(ierr); 594 VecGetArray(tt,&t); 595 scale = (2.0/omega) - 1.0; 596 /* x = (E + U)^{-1} b */ 597 VecSet(&zero,mat->lvec); 598 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 599 mat->Mvctx); CHKERR(ierr); 600 for ( i=m-1; i>-1; i-- ) { 601 n = A->i[i+1] - diag[i] - 1; 602 idx = A->j + diag[i]; 603 v = A->a + diag[i]; 604 sum = b[i]; 605 SPARSEDENSEMDOT(sum,xs,v,idx,n); 606 d = shift + A->a[diag[i]-1]; 607 n = B->i[i+1] - B->i[i]; 608 idx = B->j + B->i[i] - 1; 609 v = B->a + B->i[i] - 1; 610 SPARSEDENSEMDOT(sum,ls,v,idx,n); 611 x[i] = omega*(sum/d); 612 } 613 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 614 mat->Mvctx); CHKERR(ierr); 615 616 /* t = b - (2*E - D)x */ 617 v = A->a; 618 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 619 620 /* t = (E + L)^{-1}t */ 621 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 622 diag = A->diag; 623 VecSet(&zero,mat->lvec); 624 ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown, 625 mat->Mvctx); CHKERR(ierr); 626 for ( i=0; i<m; i++ ) { 627 n = diag[i] - A->i[i]; 628 idx = A->j + A->i[i] - 1; 629 v = A->a + A->i[i] - 1; 630 sum = t[i]; 631 SPARSEDENSEMDOT(sum,ts,v,idx,n); 632 d = shift + A->a[diag[i]-1]; 633 n = B->i[i+1] - B->i[i]; 634 idx = B->j + B->i[i] - 1; 635 v = B->a + B->i[i] - 1; 636 SPARSEDENSEMDOT(sum,ls,v,idx,n); 637 t[i] = omega*(sum/d); 638 } 639 ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown, 640 mat->Mvctx); CHKERR(ierr); 641 /* x = x + t */ 642 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 643 VecDestroy(tt); 644 return 0; 645 } 646 647 648 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 649 if (flag & SOR_ZERO_INITIAL_GUESS) { 650 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 651 } 652 else { 653 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 654 CHKERR(ierr); 655 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 656 CHKERR(ierr); 657 } 658 while (its--) { 659 /* go down through the rows */ 660 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 661 mat->Mvctx); CHKERR(ierr); 662 for ( i=0; i<m; i++ ) { 663 n = A->i[i+1] - A->i[i]; 664 idx = A->j + A->i[i] - 1; 665 v = A->a + A->i[i] - 1; 666 sum = b[i]; 667 SPARSEDENSEMDOT(sum,xs,v,idx,n); 668 d = shift + A->a[diag[i]-1]; 669 n = B->i[i+1] - B->i[i]; 670 idx = B->j + B->i[i] - 1; 671 v = B->a + B->i[i] - 1; 672 SPARSEDENSEMDOT(sum,ls,v,idx,n); 673 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 674 } 675 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 676 mat->Mvctx); CHKERR(ierr); 677 /* come up through the rows */ 678 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 679 mat->Mvctx); CHKERR(ierr); 680 for ( i=m-1; i>-1; i-- ) { 681 n = A->i[i+1] - A->i[i]; 682 idx = A->j + A->i[i] - 1; 683 v = A->a + A->i[i] - 1; 684 sum = b[i]; 685 SPARSEDENSEMDOT(sum,xs,v,idx,n); 686 d = shift + A->a[diag[i]-1]; 687 n = B->i[i+1] - B->i[i]; 688 idx = B->j + B->i[i] - 1; 689 v = B->a + B->i[i] - 1; 690 SPARSEDENSEMDOT(sum,ls,v,idx,n); 691 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 692 } 693 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 694 mat->Mvctx); CHKERR(ierr); 695 } 696 } 697 else if (flag & SOR_FORWARD_SWEEP){ 698 if (flag & SOR_ZERO_INITIAL_GUESS) { 699 VecSet(&zero,mat->lvec); 700 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 701 mat->Mvctx); CHKERR(ierr); 702 for ( i=0; i<m; i++ ) { 703 n = diag[i] - A->i[i]; 704 idx = A->j + A->i[i] - 1; 705 v = A->a + A->i[i] - 1; 706 sum = b[i]; 707 SPARSEDENSEMDOT(sum,xs,v,idx,n); 708 d = shift + A->a[diag[i]-1]; 709 n = B->i[i+1] - B->i[i]; 710 idx = B->j + B->i[i] - 1; 711 v = B->a + B->i[i] - 1; 712 SPARSEDENSEMDOT(sum,ls,v,idx,n); 713 x[i] = omega*(sum/d); 714 } 715 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 716 mat->Mvctx); CHKERR(ierr); 717 its--; 718 } 719 while (its--) { 720 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 721 CHKERR(ierr); 722 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 723 CHKERR(ierr); 724 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 725 mat->Mvctx); CHKERR(ierr); 726 for ( i=0; i<m; i++ ) { 727 n = A->i[i+1] - A->i[i]; 728 idx = A->j + A->i[i] - 1; 729 v = A->a + A->i[i] - 1; 730 sum = b[i]; 731 SPARSEDENSEMDOT(sum,xs,v,idx,n); 732 d = shift + A->a[diag[i]-1]; 733 n = B->i[i+1] - B->i[i]; 734 idx = B->j + B->i[i] - 1; 735 v = B->a + B->i[i] - 1; 736 SPARSEDENSEMDOT(sum,ls,v,idx,n); 737 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 738 } 739 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 740 mat->Mvctx); CHKERR(ierr); 741 } 742 } 743 else if (flag & SOR_BACKWARD_SWEEP){ 744 if (flag & SOR_ZERO_INITIAL_GUESS) { 745 VecSet(&zero,mat->lvec); 746 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 747 mat->Mvctx); CHKERR(ierr); 748 for ( i=m-1; i>-1; i-- ) { 749 n = A->i[i+1] - diag[i] - 1; 750 idx = A->j + diag[i]; 751 v = A->a + diag[i]; 752 sum = b[i]; 753 SPARSEDENSEMDOT(sum,xs,v,idx,n); 754 d = shift + A->a[diag[i]-1]; 755 n = B->i[i+1] - B->i[i]; 756 idx = B->j + B->i[i] - 1; 757 v = B->a + B->i[i] - 1; 758 SPARSEDENSEMDOT(sum,ls,v,idx,n); 759 x[i] = omega*(sum/d); 760 } 761 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 762 mat->Mvctx); CHKERR(ierr); 763 its--; 764 } 765 while (its--) { 766 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown, 767 mat->Mvctx); CHKERR(ierr); 768 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown, 769 mat->Mvctx); CHKERR(ierr); 770 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 771 mat->Mvctx); CHKERR(ierr); 772 for ( i=m-1; i>-1; i-- ) { 773 n = A->i[i+1] - A->i[i]; 774 idx = A->j + A->i[i] - 1; 775 v = A->a + A->i[i] - 1; 776 sum = b[i]; 777 SPARSEDENSEMDOT(sum,xs,v,idx,n); 778 d = shift + A->a[diag[i]-1]; 779 n = B->i[i+1] - B->i[i]; 780 idx = B->j + B->i[i] - 1; 781 v = B->a + B->i[i] - 1; 782 SPARSEDENSEMDOT(sum,ls,v,idx,n); 783 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 784 } 785 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 786 mat->Mvctx); CHKERR(ierr); 787 } 788 } 789 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 790 if (flag & SOR_ZERO_INITIAL_GUESS) { 791 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 792 } 793 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 794 CHKERR(ierr); 795 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 796 CHKERR(ierr); 797 while (its--) { 798 /* go down through the rows */ 799 for ( i=0; i<m; i++ ) { 800 n = A->i[i+1] - A->i[i]; 801 idx = A->j + A->i[i] - 1; 802 v = A->a + A->i[i] - 1; 803 sum = b[i]; 804 SPARSEDENSEMDOT(sum,xs,v,idx,n); 805 d = shift + A->a[diag[i]-1]; 806 n = B->i[i+1] - B->i[i]; 807 idx = B->j + B->i[i] - 1; 808 v = B->a + B->i[i] - 1; 809 SPARSEDENSEMDOT(sum,ls,v,idx,n); 810 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 811 } 812 /* come up through the rows */ 813 for ( i=m-1; i>-1; i-- ) { 814 n = A->i[i+1] - A->i[i]; 815 idx = A->j + A->i[i] - 1; 816 v = A->a + A->i[i] - 1; 817 sum = b[i]; 818 SPARSEDENSEMDOT(sum,xs,v,idx,n); 819 d = shift + A->a[diag[i]-1]; 820 n = B->i[i+1] - B->i[i]; 821 idx = B->j + B->i[i] - 1; 822 v = B->a + B->i[i] - 1; 823 SPARSEDENSEMDOT(sum,ls,v,idx,n); 824 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 825 } 826 } 827 } 828 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 829 if (flag & SOR_ZERO_INITIAL_GUESS) { 830 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 831 } 832 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 833 CHKERR(ierr); 834 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 835 CHKERR(ierr); 836 while (its--) { 837 for ( i=0; i<m; i++ ) { 838 n = A->i[i+1] - A->i[i]; 839 idx = A->j + A->i[i] - 1; 840 v = A->a + A->i[i] - 1; 841 sum = b[i]; 842 SPARSEDENSEMDOT(sum,xs,v,idx,n); 843 d = shift + A->a[diag[i]-1]; 844 n = B->i[i+1] - B->i[i]; 845 idx = B->j + B->i[i] - 1; 846 v = B->a + B->i[i] - 1; 847 SPARSEDENSEMDOT(sum,ls,v,idx,n); 848 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 849 } 850 } 851 } 852 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 853 if (flag & SOR_ZERO_INITIAL_GUESS) { 854 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 855 } 856 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll, 857 mat->Mvctx); CHKERR(ierr); 858 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll, 859 mat->Mvctx); CHKERR(ierr); 860 while (its--) { 861 for ( i=m-1; i>-1; i-- ) { 862 n = A->i[i+1] - A->i[i]; 863 idx = A->j + A->i[i] - 1; 864 v = A->a + A->i[i] - 1; 865 sum = b[i]; 866 SPARSEDENSEMDOT(sum,xs,v,idx,n); 867 d = shift + A->a[diag[i]-1]; 868 n = B->i[i+1] - B->i[i]; 869 idx = B->j + B->i[i] - 1; 870 v = B->a + B->i[i] - 1; 871 SPARSEDENSEMDOT(sum,ls,v,idx,n); 872 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 873 } 874 } 875 } 876 return 0; 877 } 878 static int MatiAIJinsopt(Mat aijin,int op) 879 { 880 Matimpiaij *aij = (Matimpiaij *) aijin->data; 881 882 if (op == NO_NEW_NONZERO_LOCATIONS) { 883 MatSetOption(aij->A,op); 884 MatSetOption(aij->B,op); 885 } 886 else if (op == YES_NEW_NONZERO_LOCATIONS) { 887 MatSetOption(aij->A,op); 888 MatSetOption(aij->B,op); 889 } 890 else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported"); 891 return 0; 892 } 893 894 static int MatiAIJsize(Mat matin,int *m,int *n) 895 { 896 Matimpiaij *mat = (Matimpiaij *) matin->data; 897 *m = mat->M; *n = mat->N; 898 return 0; 899 } 900 901 static int MatiAIJlocalsize(Mat matin,int *m,int *n) 902 { 903 Matimpiaij *mat = (Matimpiaij *) matin->data; 904 *m = mat->m; *n = mat->n; 905 return 0; 906 } 907 908 static int MatiAIJrange(Mat matin,int *m,int *n) 909 { 910 Matimpiaij *mat = (Matimpiaij *) matin->data; 911 *m = mat->rstart; *n = mat->rend; 912 return 0; 913 } 914 915 static int MatiCopy(Mat,Mat *); 916 917 /* -------------------------------------------------------------------*/ 918 static struct _MatOps MatOps = {MatiAIJInsertValues, 919 0, 0, 920 MatiAIJMult,MatiAIJMultadd,MatiAIJMultTrans,MatiAIJMultTransadd, 921 0,0,0,0, 922 0,0, 923 MatiAIJrelax, 924 0, 925 0,0,0, 926 MatiCopy, 927 MatiAIJgetdiag,0,0, 928 MatiAIJBeginAssemble,MatiAIJEndAssemble, 929 0, 930 MatiAIJinsopt,MatiZero,MatiZerorows,0, 931 0,0,0,0, 932 MatiAIJsize,MatiAIJlocalsize,MatiAIJrange }; 933 934 935 936 /*@ 937 938 MatCreateMPIAIJ - Creates a sparse parallel matrix 939 in AIJ format. 940 941 Input Parameters: 942 . comm - MPI communicator 943 . m,n - number of local rows and columns (or -1 to have calculated) 944 . M,N - global rows and columns (or -1 to have calculated) 945 . d_nz - total number nonzeros in diagonal portion of matrix 946 . d_nzz - number of nonzeros per row in diagonal portion of matrix or null 947 . You must leave room for the diagonal entry even if it is zero. 948 . o_nz - total number nonzeros in off-diagonal portion of matrix 949 . o_nzz - number of nonzeros per row in off-diagonal portion of matrix 950 . or null. You must have at least one nonzero per row. 951 952 Output parameters: 953 . newmat - the matrix 954 955 Keywords: matrix, aij, compressed row, sparse, parallel 956 @*/ 957 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 958 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 959 { 960 Mat mat; 961 Matimpiaij *aij; 962 int ierr, i,sum[2],work[2]; 963 *newmat = 0; 964 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATAIJMPI,comm); 965 mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 966 mat->ops = &MatOps; 967 mat->destroy = MatiAIJdestroy; 968 mat->view = MatiView; 969 mat->factor = 0; 970 mat->row = 0; 971 mat->col = 0; 972 973 mat->comm = comm; 974 aij->insertmode = NotSetValues; 975 MPI_Comm_rank(comm,&aij->mytid); 976 MPI_Comm_size(comm,&aij->numtids); 977 978 if (M == -1 || N == -1) { 979 work[0] = m; work[1] = n; 980 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 981 if (M == -1) M = sum[0]; 982 if (N == -1) N = sum[1]; 983 } 984 if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 985 if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 986 aij->m = m; 987 aij->n = n; 988 aij->N = N; 989 aij->M = M; 990 991 /* build local table of row and column ownerships */ 992 aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int)); 993 CHKPTR(aij->rowners); 994 aij->cowners = aij->rowners + aij->numtids + 1; 995 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 996 aij->rowners[0] = 0; 997 for ( i=2; i<=aij->numtids; i++ ) { 998 aij->rowners[i] += aij->rowners[i-1]; 999 } 1000 aij->rstart = aij->rowners[aij->mytid]; 1001 aij->rend = aij->rowners[aij->mytid+1]; 1002 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 1003 aij->cowners[0] = 0; 1004 for ( i=2; i<=aij->numtids; i++ ) { 1005 aij->cowners[i] += aij->cowners[i-1]; 1006 } 1007 aij->cstart = aij->cowners[aij->mytid]; 1008 aij->cend = aij->cowners[aij->mytid+1]; 1009 1010 1011 ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr); 1012 ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr); 1013 1014 /* build cache for off array entries formed */ 1015 aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */ 1016 aij->stash.n = 0; 1017 aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) + 1018 sizeof(Scalar))); CHKPTR(aij->stash.array); 1019 aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax); 1020 aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax); 1021 aij->colmap = 0; 1022 aij->garray = 0; 1023 1024 /* stuff used for matrix vector multiply */ 1025 aij->lvec = 0; 1026 aij->Mvctx = 0; 1027 aij->assembled = 0; 1028 1029 *newmat = mat; 1030 return 0; 1031 } 1032 1033 static int MatiCopy(Mat matin,Mat *newmat) 1034 { 1035 Mat mat; 1036 Matimpiaij *aij,*oldmat = (Matimpiaij *) matin->data; 1037 int ierr; 1038 *newmat = 0; 1039 1040 if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix"); 1041 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATAIJMPI,matin->comm); 1042 mat->data = (void *) (aij = NEW(Matimpiaij)); CHKPTR(aij); 1043 mat->ops = &MatOps; 1044 mat->destroy = MatiAIJdestroy; 1045 mat->view = MatiView; 1046 mat->factor = matin->factor; 1047 mat->row = 0; 1048 mat->col = 0; 1049 1050 aij->m = oldmat->m; 1051 aij->n = oldmat->n; 1052 aij->M = oldmat->M; 1053 aij->N = oldmat->N; 1054 1055 aij->assembled = 1; 1056 aij->rstart = oldmat->rstart; 1057 aij->rend = oldmat->rend; 1058 aij->cstart = oldmat->cstart; 1059 aij->cend = oldmat->cend; 1060 aij->numtids = oldmat->numtids; 1061 aij->mytid = oldmat->mytid; 1062 aij->insertmode = NotSetValues; 1063 1064 aij->rowners = (int *) MALLOC( (aij->numtids+1)*sizeof(int) ); 1065 CHKPTR(aij->rowners); 1066 MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1067 aij->stash.nmax = 0; 1068 aij->stash.n = 0; 1069 aij->stash.array= 0; 1070 aij->colmap = 0; 1071 aij->garray = 0; 1072 mat->comm = matin->comm; 1073 1074 ierr = VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr); 1075 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr); 1076 ierr = MatCopy(oldmat->A,&aij->A); CHKERR(ierr); 1077 ierr = MatCopy(oldmat->B,&aij->B); CHKERR(ierr); 1078 *newmat = mat; 1079 return 0; 1080 } 1081