1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.57 1995/07/12 03:04:20 curfman Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_AIJ *B = (Mat_AIJ*) aij->B->data; 18 int n = B->n,i; 19 aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 20 PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int)); 21 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 22 return 0; 23 } 24 25 extern int DisAssemble_MPIAIJ(Mat); 26 27 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 28 { 29 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 30 int ierr; 31 if (aij->numtids == 1) { 32 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 33 } else 34 SETERRQ(1,"MatGetReordering_MPIAIJ: not yet supported in parallel."); 35 return 0; 36 } 37 38 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 39 int *idxn,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 43 int cstart = aij->cstart, cend = aij->cend,row,col; 44 45 if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) { 46 SETERRQ(1,"You cannot mix inserts and adds"); 47 } 48 aij->insertmode = addv; 49 for ( i=0; i<m; i++ ) { 50 if (idxm[i] < 0) SETERRQ(1,"Negative row index"); 51 if (idxm[i] >= aij->M) SETERRQ(1,"Row index too large"); 52 if (idxm[i] >= rstart && idxm[i] < rend) { 53 row = idxm[i] - rstart; 54 for ( j=0; j<n; j++ ) { 55 if (idxn[j] < 0) SETERRQ(1,"Negative column index"); 56 if (idxn[j] >= aij->N) SETERRQ(1,"Column index too large"); 57 if (idxn[j] >= cstart && idxn[j] < cend){ 58 col = idxn[j] - cstart; 59 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 60 } 61 else { 62 if (aij->assembled) { 63 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 64 col = aij->colmap[idxn[j]] - 1; 65 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 66 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 67 col = idxn[j]; 68 } 69 } 70 else col = idxn[j]; 71 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 72 } 73 } 74 } 75 else { 76 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv); 77 CHKERRQ(ierr); 78 } 79 } 80 return 0; 81 } 82 83 /* 84 the assembly code is a lot like the code for vectors, we should 85 sometime derive a single assembly code that can be used for 86 either case. 87 */ 88 89 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 90 { 91 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 92 MPI_Comm comm = mat->comm; 93 int numtids = aij->numtids, *owners = aij->rowners; 94 int mytid = aij->mytid; 95 MPI_Request *send_waits,*recv_waits; 96 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 97 int tag = mat->tag, *owner,*starts,count,ierr; 98 InsertMode addv; 99 Scalar *rvalues,*svalues; 100 101 /* make sure all processors are either in INSERTMODE or ADDMODE */ 102 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 103 MPI_BOR,comm); 104 if (addv == (ADDVALUES|INSERTVALUES)) { 105 SETERRQ(1,"Some processors have inserted while others have added"); 106 } 107 aij->insertmode = addv; /* in case this processor had no cache */ 108 109 /* first count number of contributors to each processor */ 110 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 111 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 112 owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 113 for ( i=0; i<aij->stash.n; i++ ) { 114 idx = aij->stash.idx[i]; 115 for ( j=0; j<numtids; j++ ) { 116 if (idx >= owners[j] && idx < owners[j+1]) { 117 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 118 } 119 } 120 } 121 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 122 123 /* inform other processors of number of messages and max length*/ 124 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 125 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 126 nreceives = work[mytid]; 127 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 128 nmax = work[mytid]; 129 PETSCFREE(work); 130 131 /* post receives: 132 1) each message will consist of ordered pairs 133 (global index,value) we store the global index as a double 134 to simplify the message passing. 135 2) since we don't know how long each individual message is we 136 allocate the largest needed buffer for each receive. Potentially 137 this is a lot of wasted space. 138 139 140 This could be done better. 141 */ 142 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 143 CHKPTRQ(rvalues); 144 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 145 CHKPTRQ(recv_waits); 146 for ( i=0; i<nreceives; i++ ) { 147 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 148 comm,recv_waits+i); 149 } 150 151 /* do sends: 152 1) starts[i] gives the starting index in svalues for stuff going to 153 the ith processor 154 */ 155 svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 156 CHKPTRQ(svalues); 157 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 158 CHKPTRQ(send_waits); 159 starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts); 160 starts[0] = 0; 161 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 162 for ( i=0; i<aij->stash.n; i++ ) { 163 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 164 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 165 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 166 } 167 PETSCFREE(owner); 168 starts[0] = 0; 169 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 170 count = 0; 171 for ( i=0; i<numtids; i++ ) { 172 if (procs[i]) { 173 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 174 comm,send_waits+count++); 175 } 176 } 177 PETSCFREE(starts); PETSCFREE(nprocs); 178 179 /* Free cache space */ 180 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 181 182 aij->svalues = svalues; aij->rvalues = rvalues; 183 aij->nsends = nsends; aij->nrecvs = nreceives; 184 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 185 aij->rmax = nmax; 186 187 return 0; 188 } 189 extern int MatSetUpMultiply_MPIAIJ(Mat); 190 191 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 192 { 193 int ierr; 194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 195 MPI_Status *send_status,recv_status; 196 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 197 int row,col,other_disassembled; 198 Scalar *values,val; 199 InsertMode addv = aij->insertmode; 200 201 /* wait on receives */ 202 while (count) { 203 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 204 /* unpack receives into our local space */ 205 values = aij->rvalues + 3*imdex*aij->rmax; 206 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 207 n = n/3; 208 for ( i=0; i<n; i++ ) { 209 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 210 col = (int) PETSCREAL(values[3*i+1]); 211 val = values[3*i+2]; 212 if (col >= aij->cstart && col < aij->cend) { 213 col -= aij->cstart; 214 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 215 } 216 else { 217 if (aij->assembled) { 218 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 219 col = aij->colmap[col] - 1; 220 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 221 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 222 col = (int) PETSCREAL(values[3*i+1]); 223 } 224 } 225 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 226 } 227 } 228 count--; 229 } 230 PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 231 232 /* wait on sends */ 233 if (aij->nsends) { 234 send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) ); 235 CHKPTRQ(send_status); 236 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 237 PETSCFREE(send_status); 238 } 239 PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 240 241 aij->insertmode = NOTSETVALUES; 242 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 243 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 244 245 /* determine if any processor has disassembled, if so we must 246 also disassemble ourselfs, in order that we may reassemble. */ 247 MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1, 248 MPI_INT,MPI_PROD,mat->comm); 249 if (aij->assembled && !other_disassembled) { 250 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 251 } 252 253 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 254 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 255 aij->assembled = 1; 256 } 257 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 258 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 259 260 return 0; 261 } 262 263 static int MatZeroEntries_MPIAIJ(Mat A) 264 { 265 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 266 int ierr; 267 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 268 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 269 return 0; 270 } 271 272 /* again this uses the same basic stratagy as in the assembly and 273 scatter create routines, we should try to do it systemamatically 274 if we can figure out the proper level of generality. */ 275 276 /* the code does not do the diagonal entries correctly unless the 277 matrix is square and the column and row owerships are identical. 278 This is a BUG. The only way to fix it seems to be to access 279 aij->A and aij->B directly and not through the MatZeroRows() 280 routine. 281 */ 282 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 283 { 284 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 285 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 286 int *procs,*nprocs,j,found,idx,nsends,*work; 287 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 288 int *rvalues,tag = A->tag,count,base,slen,n,*source; 289 int *lens,imdex,*lrows,*values; 290 MPI_Comm comm = A->comm; 291 MPI_Request *send_waits,*recv_waits; 292 MPI_Status recv_status,*send_status; 293 IS istmp; 294 295 if (!l->assembled) 296 SETERRQ(1,"MatZeroRows_MPIAIJ: Must assemble matrix first"); 297 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 298 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 299 300 /* first count number of contributors to each processor */ 301 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 302 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 303 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 304 for ( i=0; i<N; i++ ) { 305 idx = rows[i]; 306 found = 0; 307 for ( j=0; j<numtids; j++ ) { 308 if (idx >= owners[j] && idx < owners[j+1]) { 309 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 310 } 311 } 312 if (!found) SETERRQ(1,"Index out of range."); 313 } 314 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 315 316 /* inform other processors of number of messages and max length*/ 317 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 318 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 319 nrecvs = work[mytid]; 320 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 321 nmax = work[mytid]; 322 PETSCFREE(work); 323 324 /* post receives: */ 325 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 326 CHKPTRQ(rvalues); 327 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 328 CHKPTRQ(recv_waits); 329 for ( i=0; i<nrecvs; i++ ) { 330 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 331 comm,recv_waits+i); 332 } 333 334 /* do sends: 335 1) starts[i] gives the starting index in svalues for stuff going to 336 the ith processor 337 */ 338 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 339 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 340 CHKPTRQ(send_waits); 341 starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts); 342 starts[0] = 0; 343 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 344 for ( i=0; i<N; i++ ) { 345 svalues[starts[owner[i]]++] = rows[i]; 346 } 347 ISRestoreIndices(is,&rows); 348 349 starts[0] = 0; 350 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 351 count = 0; 352 for ( i=0; i<numtids; i++ ) { 353 if (procs[i]) { 354 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 355 comm,send_waits+count++); 356 } 357 } 358 PETSCFREE(starts); 359 360 base = owners[mytid]; 361 362 /* wait on receives */ 363 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 364 source = lens + nrecvs; 365 count = nrecvs; slen = 0; 366 while (count) { 367 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 368 /* unpack receives into our local space */ 369 MPI_Get_count(&recv_status,MPI_INT,&n); 370 source[imdex] = recv_status.MPI_SOURCE; 371 lens[imdex] = n; 372 slen += n; 373 count--; 374 } 375 PETSCFREE(recv_waits); 376 377 /* move the data into the send scatter */ 378 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 379 count = 0; 380 for ( i=0; i<nrecvs; i++ ) { 381 values = rvalues + i*nmax; 382 for ( j=0; j<lens[i]; j++ ) { 383 lrows[count++] = values[j] - base; 384 } 385 } 386 PETSCFREE(rvalues); PETSCFREE(lens); 387 PETSCFREE(owner); PETSCFREE(nprocs); 388 389 /* actually zap the local rows */ 390 ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp); 391 CHKERRQ(ierr); PETSCFREE(lrows); 392 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 393 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 394 ierr = ISDestroy(istmp); CHKERRQ(ierr); 395 396 /* wait on sends */ 397 if (nsends) { 398 send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) ); 399 CHKPTRQ(send_status); 400 MPI_Waitall(nsends,send_waits,send_status); 401 PETSCFREE(send_status); 402 } 403 PETSCFREE(send_waits); PETSCFREE(svalues); 404 405 406 return 0; 407 } 408 409 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 412 int ierr; 413 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first"); 414 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 415 CHKERRQ(ierr); 416 ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr); 417 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 418 CHKERRQ(ierr); 419 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr); 420 return 0; 421 } 422 423 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 424 { 425 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 426 int ierr; 427 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first"); 428 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 429 CHKERRQ(ierr); 430 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 431 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 432 CHKERRQ(ierr); 433 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr); 434 return 0; 435 } 436 437 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 438 { 439 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 440 int ierr; 441 442 if (!aij->assembled) 443 SETERRQ(1,"MatMulTrans_MPIAIJ: must assemble matrix first"); 444 /* do nondiagonal part */ 445 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 446 /* send it on its way */ 447 ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES, 448 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 449 /* do local part */ 450 ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr); 451 /* receive remote parts: note this assumes the values are not actually */ 452 /* inserted in yy until the next line, which is true for my implementation*/ 453 /* but is not perhaps always true. */ 454 ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES, 455 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 456 return 0; 457 } 458 459 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 460 { 461 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 462 int ierr; 463 464 if (!aij->assembled) 465 SETERRQ(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first"); 466 /* do nondiagonal part */ 467 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 468 /* send it on its way */ 469 ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES, 470 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 471 /* do local part */ 472 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 473 /* receive remote parts: note this assumes the values are not actually */ 474 /* inserted in yy until the next line, which is true for my implementation*/ 475 /* but is not perhaps always true. */ 476 ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES, 477 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 478 return 0; 479 } 480 481 /* 482 This only works correctly for square matrices where the subblock A->A is the 483 diagonal block 484 */ 485 static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v) 486 { 487 Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 488 if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ: must assemble matrix first"); 489 return MatGetDiagonal(A->A,v); 490 } 491 492 static int MatDestroy_MPIAIJ(PetscObject obj) 493 { 494 Mat mat = (Mat) obj; 495 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 496 int ierr; 497 #if defined(PETSC_LOG) 498 PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N); 499 #endif 500 PETSCFREE(aij->rowners); 501 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 502 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 503 if (aij->colmap) PETSCFREE(aij->colmap); 504 if (aij->garray) PETSCFREE(aij->garray); 505 if (aij->lvec) VecDestroy(aij->lvec); 506 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 507 PETSCFREE(aij); 508 PLogObjectDestroy(mat); 509 PETSCHEADERDESTROY(mat); 510 return 0; 511 } 512 #include "draw.h" 513 #include "pviewer.h" 514 515 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 516 { 517 Mat mat = (Mat) obj; 518 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 519 int ierr; 520 PetscObject vobj = (PetscObject) viewer; 521 522 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ: must assemble matrix first"); 523 if (!viewer) { /* so that viewers may be used from debuggers */ 524 viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer; 525 } 526 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 527 if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) { 528 FILE *fd = ViewerFileGetPointer_Private(viewer); 529 MPIU_Seq_begin(mat->comm,1); 530 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 531 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 532 aij->cend); 533 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 534 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 535 fflush(fd); 536 MPIU_Seq_end(mat->comm,1); 537 } 538 else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) || 539 vobj->cookie == DRAW_COOKIE) { 540 int numtids = aij->numtids, mytid = aij->mytid; 541 if (numtids == 1) { 542 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 543 } 544 else { 545 /* assemble the entire matrix onto first processor. */ 546 Mat A; 547 Mat_AIJ *Aloc; 548 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 549 Scalar *a; 550 551 if (!mytid) { 552 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A); 553 } 554 else { 555 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A); 556 } 557 CHKERRQ(ierr); 558 559 /* copy over the A part */ 560 Aloc = (Mat_AIJ*) aij->A->data; 561 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 562 row = aij->rstart; 563 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;} 564 for ( i=0; i<m; i++ ) { 565 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES); 566 CHKERRQ(ierr); 567 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 568 } 569 aj = Aloc->j; 570 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;} 571 572 /* copy over the B part */ 573 Aloc = (Mat_AIJ*) aij->B->data; 574 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 575 row = aij->rstart; 576 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 577 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];} 578 for ( i=0; i<m; i++ ) { 579 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES); 580 CHKERRQ(ierr); 581 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 582 } 583 PETSCFREE(ct); 584 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 585 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 586 if (!mytid) { 587 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 588 } 589 ierr = MatDestroy(A); CHKERRQ(ierr); 590 } 591 } 592 return 0; 593 } 594 595 extern int MatMarkDiag_AIJ(Mat_AIJ *); 596 /* 597 This has to provide several versions. 598 599 1) per sequential 600 2) a) use only local smoothing updating outer values only once. 601 b) local smoothing updating outer values each inner iteration 602 3) color updating out values betwen colors. 603 */ 604 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 605 double shift,int its,Vec xx) 606 { 607 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 608 Mat AA = mat->A, BB = mat->B; 609 Mat_AIJ *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data; 610 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 611 int ierr,*idx, *diag; 612 int n = mat->n, m = mat->m, i; 613 Vec tt; 614 615 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ: must assemble matrix first"); 616 617 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 618 xs = x -1; /* shift by one for index start of 1 */ 619 ls--; 620 if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;} 621 diag = A->diag; 622 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 623 SETERRQ(1,"That option not yet support for parallel AIJ matrices"); 624 } 625 if (flag & SOR_EISENSTAT) { 626 /* Let A = L + U + D; where L is lower trianglar, 627 U is upper triangular, E is diagonal; This routine applies 628 629 (L + E)^{-1} A (U + E)^{-1} 630 631 to a vector efficiently using Eisenstat's trick. This is for 632 the case of SSOR preconditioner, so E is D/omega where omega 633 is the relaxation factor. 634 */ 635 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 636 VecGetArray(tt,&t); 637 scale = (2.0/omega) - 1.0; 638 /* x = (E + U)^{-1} b */ 639 VecSet(&zero,mat->lvec); 640 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 641 mat->Mvctx); CHKERRQ(ierr); 642 for ( i=m-1; i>-1; i-- ) { 643 n = A->i[i+1] - diag[i] - 1; 644 idx = A->j + diag[i]; 645 v = A->a + diag[i]; 646 sum = b[i]; 647 SPARSEDENSEMDOT(sum,xs,v,idx,n); 648 d = shift + A->a[diag[i]-1]; 649 n = B->i[i+1] - B->i[i]; 650 idx = B->j + B->i[i] - 1; 651 v = B->a + B->i[i] - 1; 652 SPARSEDENSEMDOT(sum,ls,v,idx,n); 653 x[i] = omega*(sum/d); 654 } 655 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 656 mat->Mvctx); CHKERRQ(ierr); 657 658 /* t = b - (2*E - D)x */ 659 v = A->a; 660 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 661 662 /* t = (E + L)^{-1}t */ 663 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 664 diag = A->diag; 665 VecSet(&zero,mat->lvec); 666 ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 667 mat->Mvctx); CHKERRQ(ierr); 668 for ( i=0; i<m; i++ ) { 669 n = diag[i] - A->i[i]; 670 idx = A->j + A->i[i] - 1; 671 v = A->a + A->i[i] - 1; 672 sum = t[i]; 673 SPARSEDENSEMDOT(sum,ts,v,idx,n); 674 d = shift + A->a[diag[i]-1]; 675 n = B->i[i+1] - B->i[i]; 676 idx = B->j + B->i[i] - 1; 677 v = B->a + B->i[i] - 1; 678 SPARSEDENSEMDOT(sum,ls,v,idx,n); 679 t[i] = omega*(sum/d); 680 } 681 ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 682 mat->Mvctx); CHKERRQ(ierr); 683 /* x = x + t */ 684 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 685 VecDestroy(tt); 686 return 0; 687 } 688 689 690 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 691 if (flag & SOR_ZERO_INITIAL_GUESS) { 692 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 693 } 694 else { 695 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 696 CHKERRQ(ierr); 697 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 698 CHKERRQ(ierr); 699 } 700 while (its--) { 701 /* go down through the rows */ 702 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 703 mat->Mvctx); CHKERRQ(ierr); 704 for ( i=0; i<m; i++ ) { 705 n = A->i[i+1] - A->i[i]; 706 idx = A->j + A->i[i] - 1; 707 v = A->a + A->i[i] - 1; 708 sum = b[i]; 709 SPARSEDENSEMDOT(sum,xs,v,idx,n); 710 d = shift + A->a[diag[i]-1]; 711 n = B->i[i+1] - B->i[i]; 712 idx = B->j + B->i[i] - 1; 713 v = B->a + B->i[i] - 1; 714 SPARSEDENSEMDOT(sum,ls,v,idx,n); 715 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 716 } 717 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 718 mat->Mvctx); CHKERRQ(ierr); 719 /* come up through the rows */ 720 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 721 mat->Mvctx); CHKERRQ(ierr); 722 for ( i=m-1; i>-1; i-- ) { 723 n = A->i[i+1] - A->i[i]; 724 idx = A->j + A->i[i] - 1; 725 v = A->a + A->i[i] - 1; 726 sum = b[i]; 727 SPARSEDENSEMDOT(sum,xs,v,idx,n); 728 d = shift + A->a[diag[i]-1]; 729 n = B->i[i+1] - B->i[i]; 730 idx = B->j + B->i[i] - 1; 731 v = B->a + B->i[i] - 1; 732 SPARSEDENSEMDOT(sum,ls,v,idx,n); 733 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 734 } 735 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 736 mat->Mvctx); CHKERRQ(ierr); 737 } 738 } 739 else if (flag & SOR_FORWARD_SWEEP){ 740 if (flag & SOR_ZERO_INITIAL_GUESS) { 741 VecSet(&zero,mat->lvec); 742 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 743 mat->Mvctx); CHKERRQ(ierr); 744 for ( i=0; i<m; i++ ) { 745 n = diag[i] - A->i[i]; 746 idx = A->j + A->i[i] - 1; 747 v = A->a + A->i[i] - 1; 748 sum = b[i]; 749 SPARSEDENSEMDOT(sum,xs,v,idx,n); 750 d = shift + A->a[diag[i]-1]; 751 n = B->i[i+1] - B->i[i]; 752 idx = B->j + B->i[i] - 1; 753 v = B->a + B->i[i] - 1; 754 SPARSEDENSEMDOT(sum,ls,v,idx,n); 755 x[i] = omega*(sum/d); 756 } 757 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 758 mat->Mvctx); CHKERRQ(ierr); 759 its--; 760 } 761 while (its--) { 762 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 763 CHKERRQ(ierr); 764 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 765 CHKERRQ(ierr); 766 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 767 mat->Mvctx); CHKERRQ(ierr); 768 for ( i=0; i<m; i++ ) { 769 n = A->i[i+1] - A->i[i]; 770 idx = A->j + A->i[i] - 1; 771 v = A->a + A->i[i] - 1; 772 sum = b[i]; 773 SPARSEDENSEMDOT(sum,xs,v,idx,n); 774 d = shift + A->a[diag[i]-1]; 775 n = B->i[i+1] - B->i[i]; 776 idx = B->j + B->i[i] - 1; 777 v = B->a + B->i[i] - 1; 778 SPARSEDENSEMDOT(sum,ls,v,idx,n); 779 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 780 } 781 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 782 mat->Mvctx); CHKERRQ(ierr); 783 } 784 } 785 else if (flag & SOR_BACKWARD_SWEEP){ 786 if (flag & SOR_ZERO_INITIAL_GUESS) { 787 VecSet(&zero,mat->lvec); 788 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 789 mat->Mvctx); CHKERRQ(ierr); 790 for ( i=m-1; i>-1; i-- ) { 791 n = A->i[i+1] - diag[i] - 1; 792 idx = A->j + diag[i]; 793 v = A->a + diag[i]; 794 sum = b[i]; 795 SPARSEDENSEMDOT(sum,xs,v,idx,n); 796 d = shift + A->a[diag[i]-1]; 797 n = B->i[i+1] - B->i[i]; 798 idx = B->j + B->i[i] - 1; 799 v = B->a + B->i[i] - 1; 800 SPARSEDENSEMDOT(sum,ls,v,idx,n); 801 x[i] = omega*(sum/d); 802 } 803 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 804 mat->Mvctx); CHKERRQ(ierr); 805 its--; 806 } 807 while (its--) { 808 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 809 mat->Mvctx); CHKERRQ(ierr); 810 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 811 mat->Mvctx); CHKERRQ(ierr); 812 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 813 mat->Mvctx); CHKERRQ(ierr); 814 for ( i=m-1; i>-1; i-- ) { 815 n = A->i[i+1] - A->i[i]; 816 idx = A->j + A->i[i] - 1; 817 v = A->a + A->i[i] - 1; 818 sum = b[i]; 819 SPARSEDENSEMDOT(sum,xs,v,idx,n); 820 d = shift + A->a[diag[i]-1]; 821 n = B->i[i+1] - B->i[i]; 822 idx = B->j + B->i[i] - 1; 823 v = B->a + B->i[i] - 1; 824 SPARSEDENSEMDOT(sum,ls,v,idx,n); 825 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 826 } 827 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 828 mat->Mvctx); CHKERRQ(ierr); 829 } 830 } 831 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 832 if (flag & SOR_ZERO_INITIAL_GUESS) { 833 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 834 } 835 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 836 CHKERRQ(ierr); 837 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 838 CHKERRQ(ierr); 839 while (its--) { 840 /* go down through the rows */ 841 for ( i=0; i<m; i++ ) { 842 n = A->i[i+1] - A->i[i]; 843 idx = A->j + A->i[i] - 1; 844 v = A->a + A->i[i] - 1; 845 sum = b[i]; 846 SPARSEDENSEMDOT(sum,xs,v,idx,n); 847 d = shift + A->a[diag[i]-1]; 848 n = B->i[i+1] - B->i[i]; 849 idx = B->j + B->i[i] - 1; 850 v = B->a + B->i[i] - 1; 851 SPARSEDENSEMDOT(sum,ls,v,idx,n); 852 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 853 } 854 /* come up through the rows */ 855 for ( i=m-1; i>-1; i-- ) { 856 n = A->i[i+1] - A->i[i]; 857 idx = A->j + A->i[i] - 1; 858 v = A->a + A->i[i] - 1; 859 sum = b[i]; 860 SPARSEDENSEMDOT(sum,xs,v,idx,n); 861 d = shift + A->a[diag[i]-1]; 862 n = B->i[i+1] - B->i[i]; 863 idx = B->j + B->i[i] - 1; 864 v = B->a + B->i[i] - 1; 865 SPARSEDENSEMDOT(sum,ls,v,idx,n); 866 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 867 } 868 } 869 } 870 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 871 if (flag & SOR_ZERO_INITIAL_GUESS) { 872 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 873 } 874 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 875 CHKERRQ(ierr); 876 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 877 CHKERRQ(ierr); 878 while (its--) { 879 for ( i=0; i<m; i++ ) { 880 n = A->i[i+1] - A->i[i]; 881 idx = A->j + A->i[i] - 1; 882 v = A->a + A->i[i] - 1; 883 sum = b[i]; 884 SPARSEDENSEMDOT(sum,xs,v,idx,n); 885 d = shift + A->a[diag[i]-1]; 886 n = B->i[i+1] - B->i[i]; 887 idx = B->j + B->i[i] - 1; 888 v = B->a + B->i[i] - 1; 889 SPARSEDENSEMDOT(sum,ls,v,idx,n); 890 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 891 } 892 } 893 } 894 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 895 if (flag & SOR_ZERO_INITIAL_GUESS) { 896 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 897 } 898 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL, 899 mat->Mvctx); CHKERRQ(ierr); 900 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL, 901 mat->Mvctx); CHKERRQ(ierr); 902 while (its--) { 903 for ( i=m-1; i>-1; i-- ) { 904 n = A->i[i+1] - A->i[i]; 905 idx = A->j + A->i[i] - 1; 906 v = A->a + A->i[i] - 1; 907 sum = b[i]; 908 SPARSEDENSEMDOT(sum,xs,v,idx,n); 909 d = shift + A->a[diag[i]-1]; 910 n = B->i[i+1] - B->i[i]; 911 idx = B->j + B->i[i] - 1; 912 v = B->a + B->i[i] - 1; 913 SPARSEDENSEMDOT(sum,ls,v,idx,n); 914 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 915 } 916 } 917 } 918 return 0; 919 } 920 921 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 922 int *nzalloc,int *mem) 923 { 924 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 925 Mat A = mat->A, B = mat->B; 926 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 927 928 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 929 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 930 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 931 if (flag == MAT_LOCAL) { 932 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 933 } else if (flag == MAT_GLOBAL_MAX) { 934 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm); 935 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 936 } else if (flag == MAT_GLOBAL_SUM) { 937 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm); 938 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 939 } 940 return 0; 941 } 942 943 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 944 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 945 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 946 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 947 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 948 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 949 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 950 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 951 952 static int MatSetOption_MPIAIJ(Mat aijin,MatOption op) 953 { 954 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 955 956 if (op == NO_NEW_NONZERO_LOCATIONS) { 957 MatSetOption(aij->A,op); 958 MatSetOption(aij->B,op); 959 } 960 else if (op == YES_NEW_NONZERO_LOCATIONS) { 961 MatSetOption(aij->A,op); 962 MatSetOption(aij->B,op); 963 } 964 else if (op == COLUMN_ORIENTED) SETERRQ(1,"Column oriented not supported"); 965 return 0; 966 } 967 968 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 969 { 970 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 971 *m = mat->M; *n = mat->N; 972 return 0; 973 } 974 975 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 976 { 977 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 978 *m = mat->m; *n = mat->N; 979 return 0; 980 } 981 982 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 983 { 984 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 985 *m = mat->rstart; *n = mat->rend; 986 return 0; 987 } 988 989 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 990 { 991 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 992 Scalar *vworkA, *vworkB, **pvA, **pvB; 993 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 994 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 995 996 if (row < rstart || row >= rend) 997 SETERRQ(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.") 998 lrow = row - rstart; 999 1000 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1001 if (!v) {pvA = 0; pvB = 0;} 1002 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1003 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1004 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1005 nztot = nzA + nzB; 1006 1007 if (v || idx) { 1008 if (nztot) { 1009 /* Sort by increasing column numbers, assuming A and B already sorted */ 1010 int imark; 1011 if (mat->assembled) { 1012 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1013 } 1014 if (v) { 1015 *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1016 for ( i=0; i<nzB; i++ ) { 1017 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1018 else break; 1019 } 1020 imark = i; 1021 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1022 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1023 } 1024 if (idx) { 1025 *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1026 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1027 for ( i=0; i<nzB; i++ ) { 1028 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1029 else break; 1030 } 1031 imark = i; 1032 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1033 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1034 } 1035 } 1036 else {*idx = 0; *v=0;} 1037 } 1038 *nz = nztot; 1039 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1040 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1041 return 0; 1042 } 1043 1044 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1045 { 1046 if (idx) PETSCFREE(*idx); 1047 if (v) PETSCFREE(*v); 1048 return 0; 1049 } 1050 1051 static int MatTranspose_MPIAIJ(Mat A,Mat *Bin) 1052 { 1053 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1054 int ierr; 1055 Mat B; 1056 Mat_AIJ *Aloc; 1057 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1058 Scalar *array; 1059 1060 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1061 CHKERRQ(ierr); 1062 1063 /* copy over the A part */ 1064 Aloc = (Mat_AIJ*) a->A->data; 1065 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1066 row = a->rstart; 1067 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;} 1068 for ( i=0; i<m; i++ ) { 1069 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES); 1070 CHKERRQ(ierr); 1071 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1072 } 1073 aj = Aloc->j; 1074 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;} 1075 1076 /* copy over the B part */ 1077 Aloc = (Mat_AIJ*) a->B->data; 1078 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1079 row = a->rstart; 1080 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 1081 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];} 1082 for ( i=0; i<m; i++ ) { 1083 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES); 1084 CHKERRQ(ierr); 1085 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1086 } 1087 PETSCFREE(ct); 1088 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1089 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1090 *Bin = B; 1091 return 0; 1092 } 1093 1094 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1095 static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1096 1097 /* -------------------------------------------------------------------*/ 1098 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1099 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1100 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1101 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1102 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1103 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1104 MatLUFactor_MPIAIJ,0, 1105 MatRelax_MPIAIJ, 1106 MatTranspose_MPIAIJ, 1107 MatGetInfo_MPIAIJ,0, 1108 MatGetDiagonal_MPIAIJ,0,0, 1109 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1110 0, 1111 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1112 MatGetReordering_MPIAIJ, 1113 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1114 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1115 MatILUFactorSymbolic_MPIAIJ,0, 1116 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1117 1118 /*@ 1119 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1120 (the default parallel PETSc format). 1121 1122 Input Parameters: 1123 . comm - MPI communicator 1124 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1125 . n - number of local columns (or PETSC_DECIDE to have calculated 1126 if N is given) 1127 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1128 . N - number of global columns (or PETSC_DECIDE to have calculated 1129 if n is given) 1130 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1131 (same for all local rows) 1132 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1133 or null (possibly different for each row). You must leave room 1134 for the diagonal entry even if it is zero. 1135 . o_nz - number of nonzeros per row in off-diagonal portion of local 1136 submatrix (same for all local rows). 1137 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1138 submatrix or null (possibly different for each row). 1139 1140 Output Parameter: 1141 . newmat - the matrix 1142 1143 Notes: 1144 The AIJ format (also called the Yale sparse matrix format or 1145 compressed row storage), is fully compatible with standard Fortran 77 1146 storage. That is, the stored row and column indices begin at 1147 one, not zero. See the users manual for further details. 1148 1149 The user MUST specify either the local or global matrix dimensions 1150 (possibly both). 1151 1152 Storage Information: 1153 For a square global matrix we define each processor's diagonal portion 1154 to be its local rows and the corresponding columns (a square submatrix); 1155 each processor's off-diagonal portion encompasses the remainder of the 1156 local matrix (a rectangular submatrix). 1157 1158 The user can specify preallocated storage for the diagonal part of 1159 the local submatrix with either d_nz or d_nnz (not both). Set both 1160 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1161 Likewise, specify preallocated storage for the off-diagonal part of 1162 the local submatrix with o_nz or o_nnz (not both). 1163 1164 .keywords: matrix, aij, compressed row, sparse, parallel 1165 1166 .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues() 1167 @*/ 1168 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1169 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1170 { 1171 Mat mat; 1172 Mat_MPIAIJ *aij; 1173 int ierr, i,sum[2],work[2]; 1174 *newmat = 0; 1175 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1176 PLogObjectCreate(mat); 1177 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1178 mat->ops = &MatOps; 1179 mat->destroy = MatDestroy_MPIAIJ; 1180 mat->view = MatView_MPIAIJ; 1181 mat->factor = 0; 1182 1183 aij->insertmode = NOTSETVALUES; 1184 MPI_Comm_rank(comm,&aij->mytid); 1185 MPI_Comm_size(comm,&aij->numtids); 1186 1187 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1188 work[0] = m; work[1] = n; 1189 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 1190 if (M == PETSC_DECIDE) M = sum[0]; 1191 if (N == PETSC_DECIDE) N = sum[1]; 1192 } 1193 if (m == PETSC_DECIDE) 1194 {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 1195 if (n == PETSC_DECIDE) 1196 {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 1197 aij->m = m; 1198 aij->n = n; 1199 aij->N = N; 1200 aij->M = M; 1201 1202 /* build local table of row and column ownerships */ 1203 aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int)); 1204 CHKPTRQ(aij->rowners); 1205 aij->cowners = aij->rowners + aij->numtids + 1; 1206 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 1207 aij->rowners[0] = 0; 1208 for ( i=2; i<=aij->numtids; i++ ) { 1209 aij->rowners[i] += aij->rowners[i-1]; 1210 } 1211 aij->rstart = aij->rowners[aij->mytid]; 1212 aij->rend = aij->rowners[aij->mytid+1]; 1213 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 1214 aij->cowners[0] = 0; 1215 for ( i=2; i<=aij->numtids; i++ ) { 1216 aij->cowners[i] += aij->cowners[i-1]; 1217 } 1218 aij->cstart = aij->cowners[aij->mytid]; 1219 aij->cend = aij->cowners[aij->mytid+1]; 1220 1221 1222 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A); 1223 CHKERRQ(ierr); 1224 PLogObjectParent(mat,aij->A); 1225 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B); 1226 CHKERRQ(ierr); 1227 PLogObjectParent(mat,aij->B); 1228 1229 /* build cache for off array entries formed */ 1230 ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr); 1231 aij->colmap = 0; 1232 aij->garray = 0; 1233 1234 /* stuff used for matrix vector multiply */ 1235 aij->lvec = 0; 1236 aij->Mvctx = 0; 1237 aij->assembled = 0; 1238 1239 *newmat = mat; 1240 return 0; 1241 } 1242 1243 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1244 { 1245 Mat mat; 1246 Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 1247 int ierr, len; 1248 *newmat = 0; 1249 1250 if (!oldmat->assembled) SETERRQ(1,"Cannot copy unassembled matrix"); 1251 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1252 PLogObjectCreate(mat); 1253 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1254 mat->ops = &MatOps; 1255 mat->destroy = MatDestroy_MPIAIJ; 1256 mat->view = MatView_MPIAIJ; 1257 mat->factor = matin->factor; 1258 1259 aij->m = oldmat->m; 1260 aij->n = oldmat->n; 1261 aij->M = oldmat->M; 1262 aij->N = oldmat->N; 1263 1264 aij->assembled = 1; 1265 aij->rstart = oldmat->rstart; 1266 aij->rend = oldmat->rend; 1267 aij->cstart = oldmat->cstart; 1268 aij->cend = oldmat->cend; 1269 aij->numtids = oldmat->numtids; 1270 aij->mytid = oldmat->mytid; 1271 aij->insertmode = NOTSETVALUES; 1272 1273 aij->rowners = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) ); 1274 CHKPTRQ(aij->rowners); 1275 PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1276 ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr); 1277 if (oldmat->colmap) { 1278 aij->colmap = (int *) PETSCMALLOC( (aij->N)*sizeof(int) ); 1279 CHKPTRQ(aij->colmap); 1280 PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int)); 1281 } else aij->colmap = 0; 1282 if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) { 1283 aij->garray = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray); 1284 PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int)); 1285 } else aij->garray = 0; 1286 1287 ierr = VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr); 1288 PLogObjectParent(mat,aij->lvec); 1289 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr); 1290 PLogObjectParent(mat,aij->Mvctx); 1291 ierr = MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr); 1292 PLogObjectParent(mat,aij->A); 1293 ierr = MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr); 1294 PLogObjectParent(mat,aij->B); 1295 *newmat = mat; 1296 return 0; 1297 } 1298