1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.16 1995/03/25 01:10:01 curfman Exp bsmith $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 #define CHUNCKSIZE 100 10 /* 11 This is a simple minded stash. Do a linear search to determine if 12 in stash, if not add to end. 13 */ 14 static int StashValues(Stash *stash,int row,int n, int *idxn, 15 Scalar *values,InsertMode addv) 16 { 17 int i,j,N = stash->n,found,*n_idx, *n_idy; 18 Scalar val,*n_array; 19 20 for ( i=0; i<n; i++ ) { 21 found = 0; 22 val = *values++; 23 for ( j=0; j<N; j++ ) { 24 if ( stash->idx[j] == row && stash->idy[j] == idxn[i]) { 25 /* found a match */ 26 if (addv == AddValues) stash->array[j] += val; 27 else stash->array[j] = val; 28 found = 1; 29 break; 30 } 31 } 32 if (!found) { /* not found so add to end */ 33 if ( stash->n == stash->nmax ) { 34 /* allocate a larger stash */ 35 n_array = (Scalar *) MALLOC( (stash->nmax + CHUNCKSIZE)*( 36 2*sizeof(int) + sizeof(Scalar))); 37 CHKPTR(n_array); 38 n_idx = (int *) (n_array + stash->nmax + CHUNCKSIZE); 39 n_idy = (int *) (n_idx + stash->nmax + CHUNCKSIZE); 40 MEMCPY(n_array,stash->array,stash->nmax*sizeof(Scalar)); 41 MEMCPY(n_idx,stash->idx,stash->nmax*sizeof(int)); 42 MEMCPY(n_idy,stash->idy,stash->nmax*sizeof(int)); 43 if (stash->array) FREE(stash->array); 44 stash->array = n_array; stash->idx = n_idx; stash->idy = n_idy; 45 stash->nmax += CHUNCKSIZE; 46 } 47 stash->array[stash->n] = val; 48 stash->idx[stash->n] = row; 49 stash->idy[stash->n++] = idxn[i]; 50 } 51 } 52 return 0; 53 } 54 55 /* local utility routine that creates a mapping from the global column 56 number to the local number in the off-diagonal part of the local 57 storage of the matrix. This is done in a non scable way since the 58 length of colmap equals the global matrix length. 59 */ 60 static int CreateColmap(Mat mat) 61 { 62 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 63 Mat_AIJ *B = (Mat_AIJ*) aij->B->data; 64 int n = B->n,i; 65 aij->colmap = (int *) MALLOC( aij->N*sizeof(int) ); CHKPTR(aij->colmap); 66 aij->invcolmap = (int *) MALLOC( aij->N*sizeof(int) ); CHKPTR(aij->invcolmap); 67 MEMSET(aij->colmap,0,aij->N*sizeof(int)); 68 MEMSET(aij->invcolmap,0,aij->N*sizeof(int)); 69 for ( i=0; i<n; i++ ) { 70 aij->colmap[aij->garray[i]] = i+1; 71 aij->invcolmap[i+1] = aij->garray[i]; 72 } 73 return 0; 74 } 75 76 static int MatInsertValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 77 int *idxn,Scalar *v,InsertMode addv) 78 { 79 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 80 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 81 int cstart = aij->cstart, cend = aij->cend,row,col; 82 83 if (aij->insertmode != NotSetValues && aij->insertmode != addv) { 84 SETERR(1,"You cannot mix inserts and adds"); 85 } 86 aij->insertmode = addv; 87 for ( i=0; i<m; i++ ) { 88 if (idxm[i] < 0) SETERR(1,"Negative row index"); 89 if (idxm[i] >= aij->M) SETERR(1,"Row index too large"); 90 if (idxm[i] >= rstart && idxm[i] < rend) { 91 row = idxm[i] - rstart; 92 for ( j=0; j<n; j++ ) { 93 if (idxn[j] < 0) SETERR(1,"Negative column index"); 94 if (idxn[j] >= aij->N) SETERR(1,"Column index too large"); 95 if (idxn[j] >= cstart && idxn[j] < cend){ 96 col = idxn[j] - cstart; 97 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 98 } 99 else { 100 if (aij->assembled) { 101 if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);} 102 col = aij->colmap[idxn[j]] - 1; 103 if (col < 0) { 104 SETERR(1,"Cannot insert new off diagonal block nonzero in\ 105 already\ 106 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 107 if your need this feature"); 108 } 109 } 110 else col = idxn[j]; 111 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERR(ierr); 112 } 113 } 114 } 115 else { 116 ierr = StashValues(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERR(ierr); 117 } 118 } 119 return 0; 120 } 121 122 /* 123 the assembly code is alot like the code for vectors, we should 124 sometime derive a single assembly code that can be used for 125 either case. 126 */ 127 128 static int MatBeginAssemble_MPIAIJ(Mat mat) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 MPI_Comm comm = mat->comm; 132 int numtids = aij->numtids, *owners = aij->rowners; 133 int mytid = aij->mytid; 134 MPI_Request *send_waits,*recv_waits; 135 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 136 int tag = 50, *owner,*starts,count; 137 InsertMode addv; 138 Scalar *rvalues,*svalues; 139 140 /* make sure all processors are either in INSERTMODE or ADDMODE */ 141 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 142 MPI_BOR,comm); 143 if (addv == (AddValues|InsertValues)) { 144 SETERR(1,"Some processors have inserted while others have added"); 145 } 146 aij->insertmode = addv; /* in case this processor had no cache */ 147 148 /* first count number of contributors to each processor */ 149 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 150 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 151 owner = (int *) MALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTR(owner); 152 for ( i=0; i<aij->stash.n; i++ ) { 153 idx = aij->stash.idx[i]; 154 for ( j=0; j<numtids; j++ ) { 155 if (idx >= owners[j] && idx < owners[j+1]) { 156 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 157 } 158 } 159 } 160 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 161 162 /* inform other processors of number of messages and max length*/ 163 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 164 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 165 nreceives = work[mytid]; 166 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 167 nmax = work[mytid]; 168 FREE(work); 169 170 /* post receives: 171 1) each message will consist of ordered pairs 172 (global index,value) we store the global index as a double 173 to simplify the message passing. 174 2) since we don't know how long each individual message is we 175 allocate the largest needed buffer for each receive. Potentially 176 this is a lot of wasted space. 177 178 179 This could be done better. 180 */ 181 rvalues = (Scalar *) MALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 182 CHKPTR(rvalues); 183 recv_waits = (MPI_Request *) MALLOC((nreceives+1)*sizeof(MPI_Request)); 184 CHKPTR(recv_waits); 185 for ( i=0; i<nreceives; i++ ) { 186 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPI_SCALAR,MPI_ANY_SOURCE,tag, 187 comm,recv_waits+i); 188 } 189 190 /* do sends: 191 1) starts[i] gives the starting index in svalues for stuff going to 192 the ith processor 193 */ 194 svalues = (Scalar *) MALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 195 CHKPTR(svalues); 196 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 197 CHKPTR(send_waits); 198 starts = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(starts); 199 starts[0] = 0; 200 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 201 for ( i=0; i<aij->stash.n; i++ ) { 202 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 203 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 204 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 205 } 206 FREE(owner); 207 starts[0] = 0; 208 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 209 count = 0; 210 for ( i=0; i<numtids; i++ ) { 211 if (procs[i]) { 212 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPI_SCALAR,i,tag, 213 comm,send_waits+count++); 214 } 215 } 216 FREE(starts); FREE(nprocs); 217 218 /* Free cache space */ 219 aij->stash.nmax = aij->stash.n = 0; 220 if (aij->stash.array){ FREE(aij->stash.array); aij->stash.array = 0;} 221 222 aij->svalues = svalues; aij->rvalues = rvalues; 223 aij->nsends = nsends; aij->nrecvs = nreceives; 224 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 225 aij->rmax = nmax; 226 227 return 0; 228 } 229 extern int MatSetUpMultiply_MPIAIJ(Mat); 230 231 static int MatEndAssemble_MPIAIJ(Mat mat) 232 { 233 int ierr; 234 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 235 236 MPI_Status *send_status,recv_status; 237 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 238 int row,col; 239 Scalar *values,val; 240 InsertMode addv = aij->insertmode; 241 242 /* wait on receives */ 243 while (count) { 244 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 245 /* unpack receives into our local space */ 246 values = aij->rvalues + 3*imdex*aij->rmax; 247 MPI_Get_count(&recv_status,MPI_SCALAR,&n); 248 n = n/3; 249 for ( i=0; i<n; i++ ) { 250 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 251 col = (int) PETSCREAL(values[3*i+1]); 252 val = values[3*i+2]; 253 if (col >= aij->cstart && col < aij->cend) { 254 col -= aij->cstart; 255 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 256 } 257 else { 258 if (aij->assembled) { 259 if (!aij->colmap) {ierr = CreateColmap(mat); CHKERR(ierr);} 260 col = aij->colmap[col] - 1; 261 if (col < 0) { 262 SETERR(1,"Cannot insert new off diagonal block nonzero in\ 263 already\ 264 assembled matrix. Contact petsc-maint@mcs.anl.gov\ 265 if your need this feature"); 266 } 267 } 268 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 269 } 270 } 271 count--; 272 } 273 FREE(aij->recv_waits); FREE(aij->rvalues); 274 275 /* wait on sends */ 276 if (aij->nsends) { 277 send_status = (MPI_Status *) MALLOC( aij->nsends*sizeof(MPI_Status) ); 278 CHKPTR(send_status); 279 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 280 FREE(send_status); 281 } 282 FREE(aij->send_waits); FREE(aij->svalues); 283 284 aij->insertmode = NotSetValues; 285 ierr = MatBeginAssembly(aij->A); CHKERR(ierr); 286 ierr = MatEndAssembly(aij->A); CHKERR(ierr); 287 288 if (!aij->assembled) { 289 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERR(ierr); 290 } 291 ierr = MatBeginAssembly(aij->B); CHKERR(ierr); 292 ierr = MatEndAssembly(aij->B); CHKERR(ierr); 293 294 aij->assembled = 1; 295 return 0; 296 } 297 298 static int MatZero_MPIAIJ(Mat A) 299 { 300 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 301 302 MatZeroEntries(l->A); MatZeroEntries(l->B); 303 return 0; 304 } 305 306 /* again this uses the same basic stratagy as in the assembly and 307 scatter create routines, we should try to do it systemamatically 308 if we can figure out the proper level of generality. */ 309 310 /* the code does not do the diagonal entries correctly unless the 311 matrix is square and the column and row owerships are identical. 312 This is a BUG. The only way to fix it seems to be to access 313 aij->A and aij->B directly and not through the MatZeroRows() 314 routine. 315 */ 316 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 317 { 318 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 319 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 320 int *procs,*nprocs,j,found,idx,nsends,*work; 321 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 322 int *rvalues,tag = 67,count,base,slen,n,*source; 323 int *lens,imdex,*lrows,*values; 324 MPI_Comm comm = A->comm; 325 MPI_Request *send_waits,*recv_waits; 326 MPI_Status recv_status,*send_status; 327 IS istmp; 328 329 if (!l->assembled) SETERR(1,"MatZeroRows_MPIAIJ: must assemble matrix first"); 330 ierr = ISGetLocalSize(is,&N); CHKERR(ierr); 331 ierr = ISGetIndices(is,&rows); CHKERR(ierr); 332 333 /* first count number of contributors to each processor */ 334 nprocs = (int *) MALLOC( 2*numtids*sizeof(int) ); CHKPTR(nprocs); 335 MEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 336 owner = (int *) MALLOC((N+1)*sizeof(int)); CHKPTR(owner); /* see note*/ 337 for ( i=0; i<N; i++ ) { 338 idx = rows[i]; 339 found = 0; 340 for ( j=0; j<numtids; j++ ) { 341 if (idx >= owners[j] && idx < owners[j+1]) { 342 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 343 } 344 } 345 if (!found) SETERR(1,"Imdex out of range"); 346 } 347 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 348 349 /* inform other processors of number of messages and max length*/ 350 work = (int *) MALLOC( numtids*sizeof(int) ); CHKPTR(work); 351 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 352 nrecvs = work[mytid]; 353 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 354 nmax = work[mytid]; 355 FREE(work); 356 357 /* post receives: */ 358 rvalues = (int *) MALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 359 CHKPTR(rvalues); 360 recv_waits = (MPI_Request *) MALLOC((nrecvs+1)*sizeof(MPI_Request)); 361 CHKPTR(recv_waits); 362 for ( i=0; i<nrecvs; i++ ) { 363 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 364 comm,recv_waits+i); 365 } 366 367 /* do sends: 368 1) starts[i] gives the starting index in svalues for stuff going to 369 the ith processor 370 */ 371 svalues = (int *) MALLOC( (N+1)*sizeof(int) ); CHKPTR(svalues); 372 send_waits = (MPI_Request *) MALLOC( (nsends+1)*sizeof(MPI_Request)); 373 CHKPTR(send_waits); 374 starts = (int *) MALLOC( (numtids+1)*sizeof(int) ); CHKPTR(starts); 375 starts[0] = 0; 376 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 377 for ( i=0; i<N; i++ ) { 378 svalues[starts[owner[i]]++] = rows[i]; 379 } 380 ISRestoreIndices(is,&rows); 381 382 starts[0] = 0; 383 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 384 count = 0; 385 for ( i=0; i<numtids; i++ ) { 386 if (procs[i]) { 387 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 388 comm,send_waits+count++); 389 } 390 } 391 FREE(starts); 392 393 base = owners[mytid]; 394 395 /* wait on receives */ 396 lens = (int *) MALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTR(lens); 397 source = lens + nrecvs; 398 count = nrecvs; slen = 0; 399 while (count) { 400 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 401 /* unpack receives into our local space */ 402 MPI_Get_count(&recv_status,MPI_INT,&n); 403 source[imdex] = recv_status.MPI_SOURCE; 404 lens[imdex] = n; 405 slen += n; 406 count--; 407 } 408 FREE(recv_waits); 409 410 /* move the data into the send scatter */ 411 lrows = (int *) MALLOC( slen*sizeof(int) ); CHKPTR(lrows); 412 count = 0; 413 for ( i=0; i<nrecvs; i++ ) { 414 values = rvalues + i*nmax; 415 for ( j=0; j<lens[i]; j++ ) { 416 lrows[count++] = values[j] - base; 417 } 418 } 419 FREE(rvalues); FREE(lens); 420 FREE(owner); FREE(nprocs); 421 422 /* actually zap the local rows */ 423 ierr = ISCreateSequential(slen,lrows,&istmp); CHKERR(ierr); FREE(lrows); 424 ierr = MatZeroRows(l->A,istmp,diag); CHKERR(ierr); 425 ierr = MatZeroRows(l->B,istmp,0); CHKERR(ierr); 426 ierr = ISDestroy(istmp); CHKERR(ierr); 427 428 /* wait on sends */ 429 if (nsends) { 430 send_status = (MPI_Status *) MALLOC( nsends*sizeof(MPI_Status) ); 431 CHKPTR(send_status); 432 MPI_Waitall(nsends,send_waits,send_status); 433 FREE(send_status); 434 } 435 FREE(send_waits); FREE(svalues); 436 437 438 return 0; 439 } 440 441 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 442 { 443 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 444 int ierr; 445 if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first"); 446 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 447 CHKERR(ierr); 448 ierr = MatMult(aij->A,xx,yy); CHKERR(ierr); 449 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 450 CHKERR(ierr); 451 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERR(ierr); 452 return 0; 453 } 454 455 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 456 { 457 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 458 int ierr; 459 if (!aij->assembled) SETERR(1,"MatMult_MPIAIJ: must assemble matrix first"); 460 ierr = VecScatterBegin(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 461 CHKERR(ierr); 462 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERR(ierr); 463 ierr = VecScatterEnd(xx,0,aij->lvec,0,InsertValues,ScatterAll,aij->Mvctx); 464 CHKERR(ierr); 465 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERR(ierr); 466 return 0; 467 } 468 469 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 470 { 471 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 472 int ierr; 473 474 if (!aij->assembled) 475 SETERR(1,"MatMulTrans_MPIAIJ: must assemble matrix first"); 476 /* do nondiagonal part */ 477 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 478 /* send it on its way */ 479 ierr = VecScatterBegin(aij->lvec,0,yy,0,AddValues, 480 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 481 /* do local part */ 482 ierr = MatMultTrans(aij->A,xx,yy); CHKERR(ierr); 483 /* receive remote parts: note this assumes the values are not actually */ 484 /* inserted in yy until the next line, which is true for my implementation*/ 485 /* but is not perhaps always true. */ 486 ierr = VecScatterEnd(aij->lvec,0,yy,0,AddValues,ScatterAll|ScatterReverse, 487 aij->Mvctx); CHKERR(ierr); 488 return 0; 489 } 490 491 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 492 { 493 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 494 int ierr; 495 496 if (!aij->assembled) 497 SETERR(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first"); 498 /* do nondiagonal part */ 499 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERR(ierr); 500 /* send it on its way */ 501 ierr = VecScatterBegin(aij->lvec,0,zz,0,AddValues, 502 ScatterAll|ScatterReverse,aij->Mvctx); CHKERR(ierr); 503 /* do local part */ 504 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERR(ierr); 505 /* receive remote parts: note this assumes the values are not actually */ 506 /* inserted in yy until the next line, which is true for my implementation*/ 507 /* but is not perhaps always true. */ 508 ierr = VecScatterEnd(aij->lvec,0,zz,0,AddValues,ScatterAll|ScatterReverse, 509 aij->Mvctx); CHKERR(ierr); 510 return 0; 511 } 512 513 /* 514 This only works correctly for square matrices where the subblock A->A is the 515 diagonal block 516 */ 517 static int MatGetDiag_MPIAIJ(Mat Ain,Vec v) 518 { 519 Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 520 if (!A->assembled) SETERR(1,"MatGetDiag_MPIAIJ: must assemble matrix first"); 521 return MatGetDiagonal(A->A,v); 522 } 523 524 static int MatDestroy_MPIAIJ(PetscObject obj) 525 { 526 Mat mat = (Mat) obj; 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 528 int ierr; 529 #if defined(PETSC_LOG) 530 PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N); 531 #endif 532 FREE(aij->rowners); 533 ierr = MatDestroy(aij->A); CHKERR(ierr); 534 ierr = MatDestroy(aij->B); CHKERR(ierr); 535 if (aij->colmap) FREE(aij->colmap); 536 if (aij->garray) FREE(aij->garray); 537 if (aij->lvec) VecDestroy(aij->lvec); 538 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 539 FREE(aij); 540 PLogObjectDestroy(mat); 541 PETSCHEADERDESTROY(mat); 542 return 0; 543 } 544 545 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 546 { 547 Mat mat = (Mat) obj; 548 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 549 int ierr; 550 PetscObject vobj = (PetscObject) viewer; 551 552 if (!aij->assembled) SETERR(1,"MatView_MPIAIJ: must assemble matrix first"); 553 if (vobj->cookie == VIEWER_COOKIE) { 554 FILE *fd = ViewerFileGetPointer(viewer); 555 if (vobj->type == FILE_VIEWER) { 556 MPE_Seq_begin(mat->comm,1); 557 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 558 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 559 aij->cend); 560 ierr = MatView(aij->A,viewer); CHKERR(ierr); 561 ierr = MatView(aij->B,viewer); CHKERR(ierr); 562 fflush(fd); 563 MPE_Seq_end(mat->comm,1); 564 } 565 } 566 return 0; 567 } 568 569 extern int MatMarkDiag_AIJ(Mat_AIJ *); 570 /* 571 This has to provide several versions. 572 573 1) per sequential 574 2) a) use only local smoothing updating outer values only once. 575 b) local smoothing updating outer values each inner iteration 576 3) color updating out values betwen colors. 577 */ 578 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,int flag,double shift, 579 int its,Vec xx) 580 { 581 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 582 Mat AA = mat->A, BB = mat->B; 583 Mat_AIJ *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data; 584 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 585 int ierr,*idx, *diag; 586 int n = mat->n, m = mat->m, i; 587 Vec tt; 588 589 if (!mat->assembled) SETERR(1,"MatRelax_MPIAIJ: must assemble matrix first"); 590 591 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 592 xs = x -1; /* shift by one for index start of 1 */ 593 ls--; 594 if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;} 595 diag = A->diag; 596 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 597 SETERR(1,"That option not yet support for parallel AIJ matrices"); 598 } 599 if (flag & SOR_EISENSTAT) { 600 /* Let A = L + U + D; where L is lower trianglar, 601 U is upper triangular, E is diagonal; This routine applies 602 603 (L + E)^{-1} A (U + E)^{-1} 604 605 to a vector efficiently using Eisenstat's trick. This is for 606 the case of SSOR preconditioner, so E is D/omega where omega 607 is the relaxation factor. 608 */ 609 ierr = VecCreate(xx,&tt); CHKERR(ierr); 610 VecGetArray(tt,&t); 611 scale = (2.0/omega) - 1.0; 612 /* x = (E + U)^{-1} b */ 613 VecSet(&zero,mat->lvec); 614 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 615 mat->Mvctx); CHKERR(ierr); 616 for ( i=m-1; i>-1; i-- ) { 617 n = A->i[i+1] - diag[i] - 1; 618 idx = A->j + diag[i]; 619 v = A->a + diag[i]; 620 sum = b[i]; 621 SPARSEDENSEMDOT(sum,xs,v,idx,n); 622 d = shift + A->a[diag[i]-1]; 623 n = B->i[i+1] - B->i[i]; 624 idx = B->j + B->i[i] - 1; 625 v = B->a + B->i[i] - 1; 626 SPARSEDENSEMDOT(sum,ls,v,idx,n); 627 x[i] = omega*(sum/d); 628 } 629 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 630 mat->Mvctx); CHKERR(ierr); 631 632 /* t = b - (2*E - D)x */ 633 v = A->a; 634 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 635 636 /* t = (E + L)^{-1}t */ 637 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 638 diag = A->diag; 639 VecSet(&zero,mat->lvec); 640 ierr = VecPipelineBegin(tt,0,mat->lvec,0,InsertValues,PipelineDown, 641 mat->Mvctx); CHKERR(ierr); 642 for ( i=0; i<m; i++ ) { 643 n = diag[i] - A->i[i]; 644 idx = A->j + A->i[i] - 1; 645 v = A->a + A->i[i] - 1; 646 sum = t[i]; 647 SPARSEDENSEMDOT(sum,ts,v,idx,n); 648 d = shift + A->a[diag[i]-1]; 649 n = B->i[i+1] - B->i[i]; 650 idx = B->j + B->i[i] - 1; 651 v = B->a + B->i[i] - 1; 652 SPARSEDENSEMDOT(sum,ls,v,idx,n); 653 t[i] = omega*(sum/d); 654 } 655 ierr = VecPipelineEnd(tt,0,mat->lvec,0,InsertValues,PipelineDown, 656 mat->Mvctx); CHKERR(ierr); 657 /* x = x + t */ 658 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 659 VecDestroy(tt); 660 return 0; 661 } 662 663 664 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 665 if (flag & SOR_ZERO_INITIAL_GUESS) { 666 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 667 } 668 else { 669 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 670 CHKERR(ierr); 671 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 672 CHKERR(ierr); 673 } 674 while (its--) { 675 /* go down through the rows */ 676 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 677 mat->Mvctx); CHKERR(ierr); 678 for ( i=0; i<m; i++ ) { 679 n = A->i[i+1] - A->i[i]; 680 idx = A->j + A->i[i] - 1; 681 v = A->a + A->i[i] - 1; 682 sum = b[i]; 683 SPARSEDENSEMDOT(sum,xs,v,idx,n); 684 d = shift + A->a[diag[i]-1]; 685 n = B->i[i+1] - B->i[i]; 686 idx = B->j + B->i[i] - 1; 687 v = B->a + B->i[i] - 1; 688 SPARSEDENSEMDOT(sum,ls,v,idx,n); 689 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 690 } 691 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 692 mat->Mvctx); CHKERR(ierr); 693 /* come up through the rows */ 694 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 695 mat->Mvctx); CHKERR(ierr); 696 for ( i=m-1; i>-1; i-- ) { 697 n = A->i[i+1] - A->i[i]; 698 idx = A->j + A->i[i] - 1; 699 v = A->a + A->i[i] - 1; 700 sum = b[i]; 701 SPARSEDENSEMDOT(sum,xs,v,idx,n); 702 d = shift + A->a[diag[i]-1]; 703 n = B->i[i+1] - B->i[i]; 704 idx = B->j + B->i[i] - 1; 705 v = B->a + B->i[i] - 1; 706 SPARSEDENSEMDOT(sum,ls,v,idx,n); 707 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 708 } 709 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 710 mat->Mvctx); CHKERR(ierr); 711 } 712 } 713 else if (flag & SOR_FORWARD_SWEEP){ 714 if (flag & SOR_ZERO_INITIAL_GUESS) { 715 VecSet(&zero,mat->lvec); 716 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 717 mat->Mvctx); CHKERR(ierr); 718 for ( i=0; i<m; i++ ) { 719 n = diag[i] - A->i[i]; 720 idx = A->j + A->i[i] - 1; 721 v = A->a + A->i[i] - 1; 722 sum = b[i]; 723 SPARSEDENSEMDOT(sum,xs,v,idx,n); 724 d = shift + A->a[diag[i]-1]; 725 n = B->i[i+1] - B->i[i]; 726 idx = B->j + B->i[i] - 1; 727 v = B->a + B->i[i] - 1; 728 SPARSEDENSEMDOT(sum,ls,v,idx,n); 729 x[i] = omega*(sum/d); 730 } 731 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 732 mat->Mvctx); CHKERR(ierr); 733 its--; 734 } 735 while (its--) { 736 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 737 CHKERR(ierr); 738 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterUp,mat->Mvctx); 739 CHKERR(ierr); 740 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineDown, 741 mat->Mvctx); CHKERR(ierr); 742 for ( i=0; i<m; i++ ) { 743 n = A->i[i+1] - A->i[i]; 744 idx = A->j + A->i[i] - 1; 745 v = A->a + A->i[i] - 1; 746 sum = b[i]; 747 SPARSEDENSEMDOT(sum,xs,v,idx,n); 748 d = shift + A->a[diag[i]-1]; 749 n = B->i[i+1] - B->i[i]; 750 idx = B->j + B->i[i] - 1; 751 v = B->a + B->i[i] - 1; 752 SPARSEDENSEMDOT(sum,ls,v,idx,n); 753 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 754 } 755 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineDown, 756 mat->Mvctx); CHKERR(ierr); 757 } 758 } 759 else if (flag & SOR_BACKWARD_SWEEP){ 760 if (flag & SOR_ZERO_INITIAL_GUESS) { 761 VecSet(&zero,mat->lvec); 762 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 763 mat->Mvctx); CHKERR(ierr); 764 for ( i=m-1; i>-1; i-- ) { 765 n = A->i[i+1] - diag[i] - 1; 766 idx = A->j + diag[i]; 767 v = A->a + diag[i]; 768 sum = b[i]; 769 SPARSEDENSEMDOT(sum,xs,v,idx,n); 770 d = shift + A->a[diag[i]-1]; 771 n = B->i[i+1] - B->i[i]; 772 idx = B->j + B->i[i] - 1; 773 v = B->a + B->i[i] - 1; 774 SPARSEDENSEMDOT(sum,ls,v,idx,n); 775 x[i] = omega*(sum/d); 776 } 777 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 778 mat->Mvctx); CHKERR(ierr); 779 its--; 780 } 781 while (its--) { 782 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterDown, 783 mat->Mvctx); CHKERR(ierr); 784 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterDown, 785 mat->Mvctx); CHKERR(ierr); 786 ierr = VecPipelineBegin(xx,0,mat->lvec,0,InsertValues,PipelineUp, 787 mat->Mvctx); CHKERR(ierr); 788 for ( i=m-1; i>-1; i-- ) { 789 n = A->i[i+1] - A->i[i]; 790 idx = A->j + A->i[i] - 1; 791 v = A->a + A->i[i] - 1; 792 sum = b[i]; 793 SPARSEDENSEMDOT(sum,xs,v,idx,n); 794 d = shift + A->a[diag[i]-1]; 795 n = B->i[i+1] - B->i[i]; 796 idx = B->j + B->i[i] - 1; 797 v = B->a + B->i[i] - 1; 798 SPARSEDENSEMDOT(sum,ls,v,idx,n); 799 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 800 } 801 ierr = VecPipelineEnd(xx,0,mat->lvec,0,InsertValues,PipelineUp, 802 mat->Mvctx); CHKERR(ierr); 803 } 804 } 805 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 806 if (flag & SOR_ZERO_INITIAL_GUESS) { 807 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 808 } 809 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 810 CHKERR(ierr); 811 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 812 CHKERR(ierr); 813 while (its--) { 814 /* go down through the rows */ 815 for ( i=0; i<m; i++ ) { 816 n = A->i[i+1] - A->i[i]; 817 idx = A->j + A->i[i] - 1; 818 v = A->a + A->i[i] - 1; 819 sum = b[i]; 820 SPARSEDENSEMDOT(sum,xs,v,idx,n); 821 d = shift + A->a[diag[i]-1]; 822 n = B->i[i+1] - B->i[i]; 823 idx = B->j + B->i[i] - 1; 824 v = B->a + B->i[i] - 1; 825 SPARSEDENSEMDOT(sum,ls,v,idx,n); 826 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 827 } 828 /* come up through the rows */ 829 for ( i=m-1; i>-1; i-- ) { 830 n = A->i[i+1] - A->i[i]; 831 idx = A->j + A->i[i] - 1; 832 v = A->a + A->i[i] - 1; 833 sum = b[i]; 834 SPARSEDENSEMDOT(sum,xs,v,idx,n); 835 d = shift + A->a[diag[i]-1]; 836 n = B->i[i+1] - B->i[i]; 837 idx = B->j + B->i[i] - 1; 838 v = B->a + B->i[i] - 1; 839 SPARSEDENSEMDOT(sum,ls,v,idx,n); 840 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 841 } 842 } 843 } 844 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 845 if (flag & SOR_ZERO_INITIAL_GUESS) { 846 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 847 } 848 ierr=VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 849 CHKERR(ierr); 850 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll,mat->Mvctx); 851 CHKERR(ierr); 852 while (its--) { 853 for ( i=0; i<m; i++ ) { 854 n = A->i[i+1] - A->i[i]; 855 idx = A->j + A->i[i] - 1; 856 v = A->a + A->i[i] - 1; 857 sum = b[i]; 858 SPARSEDENSEMDOT(sum,xs,v,idx,n); 859 d = shift + A->a[diag[i]-1]; 860 n = B->i[i+1] - B->i[i]; 861 idx = B->j + B->i[i] - 1; 862 v = B->a + B->i[i] - 1; 863 SPARSEDENSEMDOT(sum,ls,v,idx,n); 864 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 865 } 866 } 867 } 868 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 869 if (flag & SOR_ZERO_INITIAL_GUESS) { 870 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 871 } 872 ierr = VecScatterBegin(xx,0,mat->lvec,0,InsertValues,ScatterAll, 873 mat->Mvctx); CHKERR(ierr); 874 ierr = VecScatterEnd(xx,0,mat->lvec,0,InsertValues,ScatterAll, 875 mat->Mvctx); CHKERR(ierr); 876 while (its--) { 877 for ( i=m-1; i>-1; i-- ) { 878 n = A->i[i+1] - A->i[i]; 879 idx = A->j + A->i[i] - 1; 880 v = A->a + A->i[i] - 1; 881 sum = b[i]; 882 SPARSEDENSEMDOT(sum,xs,v,idx,n); 883 d = shift + A->a[diag[i]-1]; 884 n = B->i[i+1] - B->i[i]; 885 idx = B->j + B->i[i] - 1; 886 v = B->a + B->i[i] - 1; 887 SPARSEDENSEMDOT(sum,ls,v,idx,n); 888 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 889 } 890 } 891 } 892 return 0; 893 } 894 static int MatInsOpt_MPIAIJ(Mat aijin,int op) 895 { 896 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 897 898 if (op == NO_NEW_NONZERO_LOCATIONS) { 899 MatSetOption(aij->A,op); 900 MatSetOption(aij->B,op); 901 } 902 else if (op == YES_NEW_NONZERO_LOCATIONS) { 903 MatSetOption(aij->A,op); 904 MatSetOption(aij->B,op); 905 } 906 else if (op == COLUMN_ORIENTED) SETERR(1,"Column oriented not supported"); 907 return 0; 908 } 909 910 static int MatSize_MPIAIJ(Mat matin,int *m,int *n) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 913 *m = mat->M; *n = mat->N; 914 return 0; 915 } 916 917 static int MatLocalSize_MPIAIJ(Mat matin,int *m,int *n) 918 { 919 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 920 *m = mat->m; *n = mat->n; 921 return 0; 922 } 923 924 static int MatRange_MPIAIJ(Mat matin,int *m,int *n) 925 { 926 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 927 *m = mat->rstart; *n = mat->rend; 928 return 0; 929 } 930 931 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 932 { 933 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) matin->data; 934 Mat A = aij->A, B = aij->B; 935 Mat_AIJ *Ad = (Mat_AIJ *)(A->data), *Bd = (Mat_AIJ *)(B->data); 936 Scalar *vworkA, *vworkB; 937 int ierr, *cworkA, *cworkB; 938 int nztot, nzA, nzB, i, rstart = aij->rstart, rend = aij->rend; 939 int cstart = aij->cstart; 940 941 if (!aij->assembled) 942 SETERR(1,"MatGetRow_MPIAIJ: Must assemble matrix first."); 943 if (row < rstart || row >= rend) 944 SETERR(1,"MatGetRow_MPIAIJ: Currently you can get only the local rows.") 945 946 ierr = MatGetRow(A,row,&nzA,&cworkA,&vworkA); CHKERR(ierr); 947 for (i=0; i<nzA; i++) cworkA[i] += cstart; 948 ierr = MatGetRow(B,row,&nzB,&cworkB,&vworkB); CHKERR(ierr); 949 for (i=0; i<nzB; i++) { 950 printf("i=%d, invcolmap[i]=%d, cwork=%d, vwork=%g\n", 951 i, aij->invcolmap[i], cworkB[i], vworkB[i] ); 952 /* cworkB[i] = aij->invcolmap[cworkB[i]]; */ 953 } 954 955 if (nztot = nzA + nzB) { 956 *idx = (int *) MALLOC( (nztot)*sizeof(int) ); CHKPTR(*idx); 957 *v = (Scalar *) MALLOC( (nztot)*sizeof(Scalar) ); CHKPTR(*v); 958 for ( i=0; i<nzA; i++ ) { 959 (*idx)[i] = cworkA[i]; 960 (*v)[i] = vworkA[i]; 961 } 962 for ( i=0; i<nzB; i++ ) { 963 (*idx)[i+nzA] = cworkB[i]; 964 (*v)[i] = vworkB[i]; 965 } 966 } 967 else {*idx = 0; *v=0;} 968 *nz = nztot; 969 ierr = MatRestoreRow(A,row,&nzA,&cworkA,&vworkA); CHKERR(ierr); 970 ierr = MatRestoreRow(B,row,&nzB,&cworkB,&vworkB); CHKERR(ierr); 971 return 0; 972 } 973 974 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 975 { 976 if (*idx) FREE(*idx); 977 if (*v) FREE(*v); 978 return 0; 979 } 980 981 static int MatCopy_MPIAIJ(Mat,Mat *); 982 extern int MatConvert_MPIAIJ(Mat,MATTYPE,Mat *); 983 984 /* -------------------------------------------------------------------*/ 985 static struct _MatOps MatOps = {MatInsertValues_MPIAIJ, 986 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 987 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 988 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 989 0,0,0,0, 990 0,0, 991 MatRelax_MPIAIJ, 992 0, 993 0,0,0, 994 MatCopy_MPIAIJ, 995 MatGetDiag_MPIAIJ,0,0, 996 MatBeginAssemble_MPIAIJ,MatEndAssemble_MPIAIJ, 997 0, 998 MatInsOpt_MPIAIJ,MatZero_MPIAIJ,MatZeroRows_MPIAIJ,0, 999 0,0,0,0, 1000 MatSize_MPIAIJ,MatLocalSize_MPIAIJ,MatRange_MPIAIJ, 1001 0,0, 1002 0,MatConvert_MPIAIJ }; 1003 1004 /*@ 1005 1006 MatCreateMPIAIJ - Creates a sparse parallel matrix 1007 in AIJ format. 1008 1009 Input Parameters: 1010 . comm - MPI communicator 1011 . m,n - number of local rows and columns (or -1 to have calculated) 1012 . M,N - global rows and columns (or -1 to have calculated) 1013 . d_nz - total number nonzeros in diagonal portion of matrix 1014 . d_nzz - number of nonzeros per row in diagonal portion of matrix or null 1015 . You must leave room for the diagonal entry even if it is zero. 1016 . o_nz - total number nonzeros in off-diagonal portion of matrix 1017 . o_nzz - number of nonzeros per row in off-diagonal portion of matrix 1018 . or null. You must have at least one nonzero per row. 1019 1020 Output parameters: 1021 . newmat - the matrix 1022 1023 Keywords: matrix, aij, compressed row, sparse, parallel 1024 @*/ 1025 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1026 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1027 { 1028 Mat mat; 1029 Mat_MPIAIJ *aij; 1030 int ierr, i,sum[2],work[2]; 1031 *newmat = 0; 1032 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1033 PLogObjectCreate(mat); 1034 mat->data = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij); 1035 mat->ops = &MatOps; 1036 mat->destroy = MatDestroy_MPIAIJ; 1037 mat->view = MatView_MPIAIJ; 1038 mat->factor = 0; 1039 mat->row = 0; 1040 mat->col = 0; 1041 1042 mat->comm = comm; 1043 aij->insertmode = NotSetValues; 1044 MPI_Comm_rank(comm,&aij->mytid); 1045 MPI_Comm_size(comm,&aij->numtids); 1046 1047 if (M == -1 || N == -1) { 1048 work[0] = m; work[1] = n; 1049 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 1050 if (M == -1) M = sum[0]; 1051 if (N == -1) N = sum[1]; 1052 } 1053 if (m == -1) {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 1054 if (n == -1) {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 1055 aij->m = m; 1056 aij->n = n; 1057 aij->N = N; 1058 aij->M = M; 1059 1060 /* build local table of row and column ownerships */ 1061 aij->rowners = (int *) MALLOC(2*(aij->numtids+2)*sizeof(int)); 1062 CHKPTR(aij->rowners); 1063 aij->cowners = aij->rowners + aij->numtids + 1; 1064 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 1065 aij->rowners[0] = 0; 1066 for ( i=2; i<=aij->numtids; i++ ) { 1067 aij->rowners[i] += aij->rowners[i-1]; 1068 } 1069 aij->rstart = aij->rowners[aij->mytid]; 1070 aij->rend = aij->rowners[aij->mytid+1]; 1071 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 1072 aij->cowners[0] = 0; 1073 for ( i=2; i<=aij->numtids; i++ ) { 1074 aij->cowners[i] += aij->cowners[i-1]; 1075 } 1076 aij->cstart = aij->cowners[aij->mytid]; 1077 aij->cend = aij->cowners[aij->mytid+1]; 1078 1079 1080 ierr = MatCreateSequentialAIJ(m,n,d_nz,d_nnz,&aij->A); CHKERR(ierr); 1081 PLogObjectParent(mat,aij->A); 1082 ierr = MatCreateSequentialAIJ(m,N,o_nz,o_nnz,&aij->B); CHKERR(ierr); 1083 PLogObjectParent(mat,aij->B); 1084 1085 /* build cache for off array entries formed */ 1086 aij->stash.nmax = CHUNCKSIZE; /* completely arbratray number */ 1087 aij->stash.n = 0; 1088 aij->stash.array = (Scalar *) MALLOC( aij->stash.nmax*(2*sizeof(int) + 1089 sizeof(Scalar))); CHKPTR(aij->stash.array); 1090 aij->stash.idx = (int *) (aij->stash.array + aij->stash.nmax); 1091 aij->stash.idy = (int *) (aij->stash.idx + aij->stash.nmax); 1092 aij->colmap = 0; 1093 aij->garray = 0; 1094 1095 /* stuff used for matrix vector multiply */ 1096 aij->lvec = 0; 1097 aij->Mvctx = 0; 1098 aij->assembled = 0; 1099 1100 *newmat = mat; 1101 return 0; 1102 } 1103 1104 static int MatCopy_MPIAIJ(Mat matin,Mat *newmat) 1105 { 1106 Mat mat; 1107 Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 1108 int ierr; 1109 *newmat = 0; 1110 1111 if (!oldmat->assembled) SETERR(1,"Cannot copy unassembled matrix"); 1112 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1113 PLogObjectCreate(mat); 1114 mat->data = (void *) (aij = NEW(Mat_MPIAIJ)); CHKPTR(aij); 1115 mat->ops = &MatOps; 1116 mat->destroy = MatDestroy_MPIAIJ; 1117 mat->view = MatView_MPIAIJ; 1118 mat->factor = matin->factor; 1119 mat->row = 0; 1120 mat->col = 0; 1121 1122 aij->m = oldmat->m; 1123 aij->n = oldmat->n; 1124 aij->M = oldmat->M; 1125 aij->N = oldmat->N; 1126 1127 aij->assembled = 1; 1128 aij->rstart = oldmat->rstart; 1129 aij->rend = oldmat->rend; 1130 aij->cstart = oldmat->cstart; 1131 aij->cend = oldmat->cend; 1132 aij->numtids = oldmat->numtids; 1133 aij->mytid = oldmat->mytid; 1134 aij->insertmode = NotSetValues; 1135 1136 aij->rowners = (int *) MALLOC( (aij->numtids+1)*sizeof(int) ); 1137 CHKPTR(aij->rowners); 1138 MEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1139 aij->stash.nmax = 0; 1140 aij->stash.n = 0; 1141 aij->stash.array= 0; 1142 aij->colmap = 0; 1143 aij->garray = 0; 1144 mat->comm = matin->comm; 1145 1146 ierr = VecCreate(oldmat->lvec,&aij->lvec); CHKERR(ierr); 1147 PLogObjectParent(mat,aij->lvec); 1148 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERR(ierr); 1149 PLogObjectParent(mat,aij->Mvctx); 1150 ierr = MatCopy(oldmat->A,&aij->A); CHKERR(ierr); 1151 PLogObjectParent(mat,aij->A); 1152 ierr = MatCopy(oldmat->B,&aij->B); CHKERR(ierr); 1153 PLogObjectParent(mat,aij->B); 1154 *newmat = mat; 1155 return 0; 1156 } 1157