1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.59 1995/07/13 15:15:39 curfman Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_AIJ *B = (Mat_AIJ*) aij->B->data; 18 int n = B->n,i; 19 aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 20 PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int)); 21 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 22 return 0; 23 } 24 25 extern int DisAssemble_MPIAIJ(Mat); 26 27 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 28 { 29 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 30 int ierr; 31 if (aij->numtids == 1) { 32 ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr); 33 } else 34 SETERRQ(1,"MatGetReordering_MPIAIJ: not yet supported in parallel."); 35 return 0; 36 } 37 38 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 39 int *idxn,Scalar *v,InsertMode addv) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 42 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 43 int cstart = aij->cstart, cend = aij->cend,row,col; 44 45 if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) { 46 SETERRQ(1,"You cannot mix inserts and adds"); 47 } 48 aij->insertmode = addv; 49 for ( i=0; i<m; i++ ) { 50 if (idxm[i] < 0) SETERRQ(1,"Negative row index"); 51 if (idxm[i] >= aij->M) SETERRQ(1,"Row index too large"); 52 if (idxm[i] >= rstart && idxm[i] < rend) { 53 row = idxm[i] - rstart; 54 for ( j=0; j<n; j++ ) { 55 if (idxn[j] < 0) SETERRQ(1,"Negative column index"); 56 if (idxn[j] >= aij->N) SETERRQ(1,"Column index too large"); 57 if (idxn[j] >= cstart && idxn[j] < cend){ 58 col = idxn[j] - cstart; 59 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 60 } 61 else { 62 if (aij->assembled) { 63 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 64 col = aij->colmap[idxn[j]] - 1; 65 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 66 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 67 col = idxn[j]; 68 } 69 } 70 else col = idxn[j]; 71 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 72 } 73 } 74 } 75 else { 76 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv); 77 CHKERRQ(ierr); 78 } 79 } 80 return 0; 81 } 82 83 /* 84 the assembly code is a lot like the code for vectors, we should 85 sometime derive a single assembly code that can be used for 86 either case. 87 */ 88 89 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 90 { 91 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 92 MPI_Comm comm = mat->comm; 93 int numtids = aij->numtids, *owners = aij->rowners; 94 int mytid = aij->mytid; 95 MPI_Request *send_waits,*recv_waits; 96 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 97 int tag = mat->tag, *owner,*starts,count,ierr; 98 InsertMode addv; 99 Scalar *rvalues,*svalues; 100 101 /* make sure all processors are either in INSERTMODE or ADDMODE */ 102 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 103 MPI_BOR,comm); 104 if (addv == (ADDVALUES|INSERTVALUES)) { 105 SETERRQ(1,"Some processors have inserted while others have added"); 106 } 107 aij->insertmode = addv; /* in case this processor had no cache */ 108 109 /* first count number of contributors to each processor */ 110 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 111 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 112 owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 113 for ( i=0; i<aij->stash.n; i++ ) { 114 idx = aij->stash.idx[i]; 115 for ( j=0; j<numtids; j++ ) { 116 if (idx >= owners[j] && idx < owners[j+1]) { 117 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 118 } 119 } 120 } 121 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 122 123 /* inform other processors of number of messages and max length*/ 124 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 125 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 126 nreceives = work[mytid]; 127 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 128 nmax = work[mytid]; 129 PETSCFREE(work); 130 131 /* post receives: 132 1) each message will consist of ordered pairs 133 (global index,value) we store the global index as a double 134 to simplify the message passing. 135 2) since we don't know how long each individual message is we 136 allocate the largest needed buffer for each receive. Potentially 137 this is a lot of wasted space. 138 139 140 This could be done better. 141 */ 142 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 143 CHKPTRQ(rvalues); 144 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 145 CHKPTRQ(recv_waits); 146 for ( i=0; i<nreceives; i++ ) { 147 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 148 comm,recv_waits+i); 149 } 150 151 /* do sends: 152 1) starts[i] gives the starting index in svalues for stuff going to 153 the ith processor 154 */ 155 svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 156 CHKPTRQ(svalues); 157 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 158 CHKPTRQ(send_waits); 159 starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts); 160 starts[0] = 0; 161 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 162 for ( i=0; i<aij->stash.n; i++ ) { 163 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 164 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 165 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 166 } 167 PETSCFREE(owner); 168 starts[0] = 0; 169 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 170 count = 0; 171 for ( i=0; i<numtids; i++ ) { 172 if (procs[i]) { 173 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 174 comm,send_waits+count++); 175 } 176 } 177 PETSCFREE(starts); PETSCFREE(nprocs); 178 179 /* Free cache space */ 180 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 181 182 aij->svalues = svalues; aij->rvalues = rvalues; 183 aij->nsends = nsends; aij->nrecvs = nreceives; 184 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 185 aij->rmax = nmax; 186 187 return 0; 188 } 189 extern int MatSetUpMultiply_MPIAIJ(Mat); 190 191 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 192 { 193 int ierr; 194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 195 MPI_Status *send_status,recv_status; 196 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 197 int row,col,other_disassembled; 198 Scalar *values,val; 199 InsertMode addv = aij->insertmode; 200 201 /* wait on receives */ 202 while (count) { 203 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 204 /* unpack receives into our local space */ 205 values = aij->rvalues + 3*imdex*aij->rmax; 206 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 207 n = n/3; 208 for ( i=0; i<n; i++ ) { 209 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 210 col = (int) PETSCREAL(values[3*i+1]); 211 val = values[3*i+2]; 212 if (col >= aij->cstart && col < aij->cend) { 213 col -= aij->cstart; 214 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 215 } 216 else { 217 if (aij->assembled) { 218 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 219 col = aij->colmap[col] - 1; 220 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 221 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 222 col = (int) PETSCREAL(values[3*i+1]); 223 } 224 } 225 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 226 } 227 } 228 count--; 229 } 230 PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 231 232 /* wait on sends */ 233 if (aij->nsends) { 234 send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) ); 235 CHKPTRQ(send_status); 236 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 237 PETSCFREE(send_status); 238 } 239 PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 240 241 aij->insertmode = NOTSETVALUES; 242 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 243 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 244 245 /* determine if any processor has disassembled, if so we must 246 also disassemble ourselfs, in order that we may reassemble. */ 247 MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1, 248 MPI_INT,MPI_PROD,mat->comm); 249 if (aij->assembled && !other_disassembled) { 250 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 251 } 252 253 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 254 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 255 aij->assembled = 1; 256 } 257 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 258 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 259 260 return 0; 261 } 262 263 static int MatZeroEntries_MPIAIJ(Mat A) 264 { 265 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 266 int ierr; 267 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 268 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 269 return 0; 270 } 271 272 /* again this uses the same basic stratagy as in the assembly and 273 scatter create routines, we should try to do it systemamatically 274 if we can figure out the proper level of generality. */ 275 276 /* the code does not do the diagonal entries correctly unless the 277 matrix is square and the column and row owerships are identical. 278 This is a BUG. The only way to fix it seems to be to access 279 aij->A and aij->B directly and not through the MatZeroRows() 280 routine. 281 */ 282 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 283 { 284 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 285 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 286 int *procs,*nprocs,j,found,idx,nsends,*work; 287 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 288 int *rvalues,tag = A->tag,count,base,slen,n,*source; 289 int *lens,imdex,*lrows,*values; 290 MPI_Comm comm = A->comm; 291 MPI_Request *send_waits,*recv_waits; 292 MPI_Status recv_status,*send_status; 293 IS istmp; 294 295 if (!l->assembled) 296 SETERRQ(1,"MatZeroRows_MPIAIJ: Must assemble matrix first"); 297 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 298 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 299 300 /* first count number of contributors to each processor */ 301 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 302 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 303 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 304 for ( i=0; i<N; i++ ) { 305 idx = rows[i]; 306 found = 0; 307 for ( j=0; j<numtids; j++ ) { 308 if (idx >= owners[j] && idx < owners[j+1]) { 309 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 310 } 311 } 312 if (!found) SETERRQ(1,"Index out of range."); 313 } 314 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 315 316 /* inform other processors of number of messages and max length*/ 317 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 318 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 319 nrecvs = work[mytid]; 320 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 321 nmax = work[mytid]; 322 PETSCFREE(work); 323 324 /* post receives: */ 325 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 326 CHKPTRQ(rvalues); 327 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 328 CHKPTRQ(recv_waits); 329 for ( i=0; i<nrecvs; i++ ) { 330 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 331 comm,recv_waits+i); 332 } 333 334 /* do sends: 335 1) starts[i] gives the starting index in svalues for stuff going to 336 the ith processor 337 */ 338 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 339 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 340 CHKPTRQ(send_waits); 341 starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts); 342 starts[0] = 0; 343 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 344 for ( i=0; i<N; i++ ) { 345 svalues[starts[owner[i]]++] = rows[i]; 346 } 347 ISRestoreIndices(is,&rows); 348 349 starts[0] = 0; 350 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 351 count = 0; 352 for ( i=0; i<numtids; i++ ) { 353 if (procs[i]) { 354 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 355 comm,send_waits+count++); 356 } 357 } 358 PETSCFREE(starts); 359 360 base = owners[mytid]; 361 362 /* wait on receives */ 363 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 364 source = lens + nrecvs; 365 count = nrecvs; slen = 0; 366 while (count) { 367 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 368 /* unpack receives into our local space */ 369 MPI_Get_count(&recv_status,MPI_INT,&n); 370 source[imdex] = recv_status.MPI_SOURCE; 371 lens[imdex] = n; 372 slen += n; 373 count--; 374 } 375 PETSCFREE(recv_waits); 376 377 /* move the data into the send scatter */ 378 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 379 count = 0; 380 for ( i=0; i<nrecvs; i++ ) { 381 values = rvalues + i*nmax; 382 for ( j=0; j<lens[i]; j++ ) { 383 lrows[count++] = values[j] - base; 384 } 385 } 386 PETSCFREE(rvalues); PETSCFREE(lens); 387 PETSCFREE(owner); PETSCFREE(nprocs); 388 389 /* actually zap the local rows */ 390 ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp); 391 CHKERRQ(ierr); PETSCFREE(lrows); 392 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 393 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 394 ierr = ISDestroy(istmp); CHKERRQ(ierr); 395 396 /* wait on sends */ 397 if (nsends) { 398 send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) ); 399 CHKPTRQ(send_status); 400 MPI_Waitall(nsends,send_waits,send_status); 401 PETSCFREE(send_status); 402 } 403 PETSCFREE(send_waits); PETSCFREE(svalues); 404 405 406 return 0; 407 } 408 409 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 412 int ierr; 413 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first"); 414 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 415 CHKERRQ(ierr); 416 ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr); 417 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 418 CHKERRQ(ierr); 419 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr); 420 return 0; 421 } 422 423 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 424 { 425 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 426 int ierr; 427 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first"); 428 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 429 CHKERRQ(ierr); 430 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 431 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 432 CHKERRQ(ierr); 433 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr); 434 return 0; 435 } 436 437 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 438 { 439 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 440 int ierr; 441 442 if (!aij->assembled) 443 SETERRQ(1,"MatMulTrans_MPIAIJ: must assemble matrix first"); 444 /* do nondiagonal part */ 445 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 446 /* send it on its way */ 447 ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES, 448 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 449 /* do local part */ 450 ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr); 451 /* receive remote parts: note this assumes the values are not actually */ 452 /* inserted in yy until the next line, which is true for my implementation*/ 453 /* but is not perhaps always true. */ 454 ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES, 455 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 456 return 0; 457 } 458 459 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 460 { 461 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 462 int ierr; 463 464 if (!aij->assembled) 465 SETERRQ(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first"); 466 /* do nondiagonal part */ 467 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 468 /* send it on its way */ 469 ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES, 470 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 471 /* do local part */ 472 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 473 /* receive remote parts: note this assumes the values are not actually */ 474 /* inserted in yy until the next line, which is true for my implementation*/ 475 /* but is not perhaps always true. */ 476 ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES, 477 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 478 return 0; 479 } 480 481 /* 482 This only works correctly for square matrices where the subblock A->A is the 483 diagonal block 484 */ 485 static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v) 486 { 487 Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 488 if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ: must assemble matrix first"); 489 return MatGetDiagonal(A->A,v); 490 } 491 492 static int MatDestroy_MPIAIJ(PetscObject obj) 493 { 494 Mat mat = (Mat) obj; 495 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 496 int ierr; 497 #if defined(PETSC_LOG) 498 PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N); 499 #endif 500 PETSCFREE(aij->rowners); 501 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 502 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 503 if (aij->colmap) PETSCFREE(aij->colmap); 504 if (aij->garray) PETSCFREE(aij->garray); 505 if (aij->lvec) VecDestroy(aij->lvec); 506 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 507 PETSCFREE(aij); 508 PLogObjectDestroy(mat); 509 PETSCHEADERDESTROY(mat); 510 return 0; 511 } 512 #include "draw.h" 513 #include "pviewer.h" 514 515 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 516 { 517 Mat mat = (Mat) obj; 518 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 519 int ierr; 520 PetscObject vobj = (PetscObject) viewer; 521 522 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ: must assemble matrix first"); 523 if (!viewer) { /* so that viewers may be used from debuggers */ 524 viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer; 525 } 526 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 527 if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) { 528 FILE *fd = ViewerFileGetPointer_Private(viewer); 529 MPIU_Seq_begin(mat->comm,1); 530 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 531 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 532 aij->cend); 533 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 534 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 535 fflush(fd); 536 MPIU_Seq_end(mat->comm,1); 537 } 538 else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) || 539 vobj->cookie == DRAW_COOKIE) { 540 int numtids = aij->numtids, mytid = aij->mytid; 541 if (numtids == 1) { 542 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 543 } 544 else { 545 /* assemble the entire matrix onto first processor. */ 546 Mat A; 547 Mat_AIJ *Aloc; 548 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 549 Scalar *a; 550 551 if (!mytid) { 552 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A); 553 } 554 else { 555 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A); 556 } 557 CHKERRQ(ierr); 558 559 /* copy over the A part */ 560 Aloc = (Mat_AIJ*) aij->A->data; 561 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 562 row = aij->rstart; 563 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;} 564 for ( i=0; i<m; i++ ) { 565 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES); 566 CHKERRQ(ierr); 567 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 568 } 569 aj = Aloc->j; 570 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;} 571 572 /* copy over the B part */ 573 Aloc = (Mat_AIJ*) aij->B->data; 574 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 575 row = aij->rstart; 576 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 577 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];} 578 for ( i=0; i<m; i++ ) { 579 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES); 580 CHKERRQ(ierr); 581 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 582 } 583 PETSCFREE(ct); 584 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 585 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 586 if (!mytid) { 587 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 588 } 589 ierr = MatDestroy(A); CHKERRQ(ierr); 590 } 591 } 592 return 0; 593 } 594 595 extern int MatMarkDiag_AIJ(Mat_AIJ *); 596 /* 597 This has to provide several versions. 598 599 1) per sequential 600 2) a) use only local smoothing updating outer values only once. 601 b) local smoothing updating outer values each inner iteration 602 3) color updating out values betwen colors. 603 */ 604 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 605 double shift,int its,Vec xx) 606 { 607 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 608 Mat AA = mat->A, BB = mat->B; 609 Mat_AIJ *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data; 610 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 611 int ierr,*idx, *diag; 612 int n = mat->n, m = mat->m, i; 613 Vec tt; 614 615 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ: must assemble matrix first"); 616 617 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 618 xs = x -1; /* shift by one for index start of 1 */ 619 ls--; 620 if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;} 621 diag = A->diag; 622 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 623 SETERRQ(1,"That option not yet support for parallel AIJ matrices"); 624 } 625 if (flag & SOR_EISENSTAT) { 626 /* Let A = L + U + D; where L is lower trianglar, 627 U is upper triangular, E is diagonal; This routine applies 628 629 (L + E)^{-1} A (U + E)^{-1} 630 631 to a vector efficiently using Eisenstat's trick. This is for 632 the case of SSOR preconditioner, so E is D/omega where omega 633 is the relaxation factor. 634 */ 635 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 636 VecGetArray(tt,&t); 637 scale = (2.0/omega) - 1.0; 638 /* x = (E + U)^{-1} b */ 639 VecSet(&zero,mat->lvec); 640 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 641 mat->Mvctx); CHKERRQ(ierr); 642 for ( i=m-1; i>-1; i-- ) { 643 n = A->i[i+1] - diag[i] - 1; 644 idx = A->j + diag[i]; 645 v = A->a + diag[i]; 646 sum = b[i]; 647 SPARSEDENSEMDOT(sum,xs,v,idx,n); 648 d = shift + A->a[diag[i]-1]; 649 n = B->i[i+1] - B->i[i]; 650 idx = B->j + B->i[i] - 1; 651 v = B->a + B->i[i] - 1; 652 SPARSEDENSEMDOT(sum,ls,v,idx,n); 653 x[i] = omega*(sum/d); 654 } 655 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 656 mat->Mvctx); CHKERRQ(ierr); 657 658 /* t = b - (2*E - D)x */ 659 v = A->a; 660 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 661 662 /* t = (E + L)^{-1}t */ 663 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 664 diag = A->diag; 665 VecSet(&zero,mat->lvec); 666 ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 667 mat->Mvctx); CHKERRQ(ierr); 668 for ( i=0; i<m; i++ ) { 669 n = diag[i] - A->i[i]; 670 idx = A->j + A->i[i] - 1; 671 v = A->a + A->i[i] - 1; 672 sum = t[i]; 673 SPARSEDENSEMDOT(sum,ts,v,idx,n); 674 d = shift + A->a[diag[i]-1]; 675 n = B->i[i+1] - B->i[i]; 676 idx = B->j + B->i[i] - 1; 677 v = B->a + B->i[i] - 1; 678 SPARSEDENSEMDOT(sum,ls,v,idx,n); 679 t[i] = omega*(sum/d); 680 } 681 ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 682 mat->Mvctx); CHKERRQ(ierr); 683 /* x = x + t */ 684 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 685 VecDestroy(tt); 686 return 0; 687 } 688 689 690 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 691 if (flag & SOR_ZERO_INITIAL_GUESS) { 692 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 693 } 694 else { 695 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 696 CHKERRQ(ierr); 697 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 698 CHKERRQ(ierr); 699 } 700 while (its--) { 701 /* go down through the rows */ 702 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 703 mat->Mvctx); CHKERRQ(ierr); 704 for ( i=0; i<m; i++ ) { 705 n = A->i[i+1] - A->i[i]; 706 idx = A->j + A->i[i] - 1; 707 v = A->a + A->i[i] - 1; 708 sum = b[i]; 709 SPARSEDENSEMDOT(sum,xs,v,idx,n); 710 d = shift + A->a[diag[i]-1]; 711 n = B->i[i+1] - B->i[i]; 712 idx = B->j + B->i[i] - 1; 713 v = B->a + B->i[i] - 1; 714 SPARSEDENSEMDOT(sum,ls,v,idx,n); 715 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 716 } 717 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 718 mat->Mvctx); CHKERRQ(ierr); 719 /* come up through the rows */ 720 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 721 mat->Mvctx); CHKERRQ(ierr); 722 for ( i=m-1; i>-1; i-- ) { 723 n = A->i[i+1] - A->i[i]; 724 idx = A->j + A->i[i] - 1; 725 v = A->a + A->i[i] - 1; 726 sum = b[i]; 727 SPARSEDENSEMDOT(sum,xs,v,idx,n); 728 d = shift + A->a[diag[i]-1]; 729 n = B->i[i+1] - B->i[i]; 730 idx = B->j + B->i[i] - 1; 731 v = B->a + B->i[i] - 1; 732 SPARSEDENSEMDOT(sum,ls,v,idx,n); 733 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 734 } 735 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 736 mat->Mvctx); CHKERRQ(ierr); 737 } 738 } 739 else if (flag & SOR_FORWARD_SWEEP){ 740 if (flag & SOR_ZERO_INITIAL_GUESS) { 741 VecSet(&zero,mat->lvec); 742 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 743 mat->Mvctx); CHKERRQ(ierr); 744 for ( i=0; i<m; i++ ) { 745 n = diag[i] - A->i[i]; 746 idx = A->j + A->i[i] - 1; 747 v = A->a + A->i[i] - 1; 748 sum = b[i]; 749 SPARSEDENSEMDOT(sum,xs,v,idx,n); 750 d = shift + A->a[diag[i]-1]; 751 n = B->i[i+1] - B->i[i]; 752 idx = B->j + B->i[i] - 1; 753 v = B->a + B->i[i] - 1; 754 SPARSEDENSEMDOT(sum,ls,v,idx,n); 755 x[i] = omega*(sum/d); 756 } 757 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 758 mat->Mvctx); CHKERRQ(ierr); 759 its--; 760 } 761 while (its--) { 762 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 763 CHKERRQ(ierr); 764 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 765 CHKERRQ(ierr); 766 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 767 mat->Mvctx); CHKERRQ(ierr); 768 for ( i=0; i<m; i++ ) { 769 n = A->i[i+1] - A->i[i]; 770 idx = A->j + A->i[i] - 1; 771 v = A->a + A->i[i] - 1; 772 sum = b[i]; 773 SPARSEDENSEMDOT(sum,xs,v,idx,n); 774 d = shift + A->a[diag[i]-1]; 775 n = B->i[i+1] - B->i[i]; 776 idx = B->j + B->i[i] - 1; 777 v = B->a + B->i[i] - 1; 778 SPARSEDENSEMDOT(sum,ls,v,idx,n); 779 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 780 } 781 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 782 mat->Mvctx); CHKERRQ(ierr); 783 } 784 } 785 else if (flag & SOR_BACKWARD_SWEEP){ 786 if (flag & SOR_ZERO_INITIAL_GUESS) { 787 VecSet(&zero,mat->lvec); 788 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 789 mat->Mvctx); CHKERRQ(ierr); 790 for ( i=m-1; i>-1; i-- ) { 791 n = A->i[i+1] - diag[i] - 1; 792 idx = A->j + diag[i]; 793 v = A->a + diag[i]; 794 sum = b[i]; 795 SPARSEDENSEMDOT(sum,xs,v,idx,n); 796 d = shift + A->a[diag[i]-1]; 797 n = B->i[i+1] - B->i[i]; 798 idx = B->j + B->i[i] - 1; 799 v = B->a + B->i[i] - 1; 800 SPARSEDENSEMDOT(sum,ls,v,idx,n); 801 x[i] = omega*(sum/d); 802 } 803 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 804 mat->Mvctx); CHKERRQ(ierr); 805 its--; 806 } 807 while (its--) { 808 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 809 mat->Mvctx); CHKERRQ(ierr); 810 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 811 mat->Mvctx); CHKERRQ(ierr); 812 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 813 mat->Mvctx); CHKERRQ(ierr); 814 for ( i=m-1; i>-1; i-- ) { 815 n = A->i[i+1] - A->i[i]; 816 idx = A->j + A->i[i] - 1; 817 v = A->a + A->i[i] - 1; 818 sum = b[i]; 819 SPARSEDENSEMDOT(sum,xs,v,idx,n); 820 d = shift + A->a[diag[i]-1]; 821 n = B->i[i+1] - B->i[i]; 822 idx = B->j + B->i[i] - 1; 823 v = B->a + B->i[i] - 1; 824 SPARSEDENSEMDOT(sum,ls,v,idx,n); 825 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 826 } 827 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 828 mat->Mvctx); CHKERRQ(ierr); 829 } 830 } 831 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 832 if (flag & SOR_ZERO_INITIAL_GUESS) { 833 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 834 } 835 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 836 CHKERRQ(ierr); 837 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 838 CHKERRQ(ierr); 839 while (its--) { 840 /* go down through the rows */ 841 for ( i=0; i<m; i++ ) { 842 n = A->i[i+1] - A->i[i]; 843 idx = A->j + A->i[i] - 1; 844 v = A->a + A->i[i] - 1; 845 sum = b[i]; 846 SPARSEDENSEMDOT(sum,xs,v,idx,n); 847 d = shift + A->a[diag[i]-1]; 848 n = B->i[i+1] - B->i[i]; 849 idx = B->j + B->i[i] - 1; 850 v = B->a + B->i[i] - 1; 851 SPARSEDENSEMDOT(sum,ls,v,idx,n); 852 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 853 } 854 /* come up through the rows */ 855 for ( i=m-1; i>-1; i-- ) { 856 n = A->i[i+1] - A->i[i]; 857 idx = A->j + A->i[i] - 1; 858 v = A->a + A->i[i] - 1; 859 sum = b[i]; 860 SPARSEDENSEMDOT(sum,xs,v,idx,n); 861 d = shift + A->a[diag[i]-1]; 862 n = B->i[i+1] - B->i[i]; 863 idx = B->j + B->i[i] - 1; 864 v = B->a + B->i[i] - 1; 865 SPARSEDENSEMDOT(sum,ls,v,idx,n); 866 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 867 } 868 } 869 } 870 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 871 if (flag & SOR_ZERO_INITIAL_GUESS) { 872 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 873 } 874 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 875 CHKERRQ(ierr); 876 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 877 CHKERRQ(ierr); 878 while (its--) { 879 for ( i=0; i<m; i++ ) { 880 n = A->i[i+1] - A->i[i]; 881 idx = A->j + A->i[i] - 1; 882 v = A->a + A->i[i] - 1; 883 sum = b[i]; 884 SPARSEDENSEMDOT(sum,xs,v,idx,n); 885 d = shift + A->a[diag[i]-1]; 886 n = B->i[i+1] - B->i[i]; 887 idx = B->j + B->i[i] - 1; 888 v = B->a + B->i[i] - 1; 889 SPARSEDENSEMDOT(sum,ls,v,idx,n); 890 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 891 } 892 } 893 } 894 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 895 if (flag & SOR_ZERO_INITIAL_GUESS) { 896 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 897 } 898 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL, 899 mat->Mvctx); CHKERRQ(ierr); 900 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL, 901 mat->Mvctx); CHKERRQ(ierr); 902 while (its--) { 903 for ( i=m-1; i>-1; i-- ) { 904 n = A->i[i+1] - A->i[i]; 905 idx = A->j + A->i[i] - 1; 906 v = A->a + A->i[i] - 1; 907 sum = b[i]; 908 SPARSEDENSEMDOT(sum,xs,v,idx,n); 909 d = shift + A->a[diag[i]-1]; 910 n = B->i[i+1] - B->i[i]; 911 idx = B->j + B->i[i] - 1; 912 v = B->a + B->i[i] - 1; 913 SPARSEDENSEMDOT(sum,ls,v,idx,n); 914 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 915 } 916 } 917 } 918 return 0; 919 } 920 921 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 922 int *nzalloc,int *mem) 923 { 924 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 925 Mat A = mat->A, B = mat->B; 926 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 927 928 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 929 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 930 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 931 if (flag == MAT_LOCAL) { 932 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 933 } else if (flag == MAT_GLOBAL_MAX) { 934 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm); 935 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 936 } else if (flag == MAT_GLOBAL_SUM) { 937 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm); 938 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 939 } 940 return 0; 941 } 942 943 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 944 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 945 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 946 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 947 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 948 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 949 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 950 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 951 952 static int MatSetOption_MPIAIJ(Mat aijin,MatOption op) 953 { 954 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 955 956 if (op == NO_NEW_NONZERO_LOCATIONS) { 957 MatSetOption(aij->A,op); 958 MatSetOption(aij->B,op); 959 } 960 else if (op == YES_NEW_NONZERO_LOCATIONS) { 961 MatSetOption(aij->A,op); 962 MatSetOption(aij->B,op); 963 } 964 else if (op == COLUMN_ORIENTED) SETERRQ(1,"Column oriented not supported"); 965 return 0; 966 } 967 968 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 969 { 970 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 971 *m = mat->M; *n = mat->N; 972 return 0; 973 } 974 975 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 976 { 977 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 978 *m = mat->m; *n = mat->N; 979 return 0; 980 } 981 982 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 983 { 984 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 985 *m = mat->rstart; *n = mat->rend; 986 return 0; 987 } 988 989 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 990 { 991 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 992 Scalar *vworkA, *vworkB, **pvA, **pvB; 993 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 994 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 995 996 if (row < rstart || row >= rend) 997 SETERRQ(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.") 998 lrow = row - rstart; 999 1000 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1001 if (!v) {pvA = 0; pvB = 0;} 1002 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1003 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1004 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1005 nztot = nzA + nzB; 1006 1007 if (v || idx) { 1008 if (nztot) { 1009 /* Sort by increasing column numbers, assuming A and B already sorted */ 1010 int imark; 1011 if (mat->assembled) { 1012 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1013 } 1014 if (v) { 1015 *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1016 for ( i=0; i<nzB; i++ ) { 1017 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1018 else break; 1019 } 1020 imark = i; 1021 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1022 for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1023 } 1024 if (idx) { 1025 *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1026 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1027 for ( i=0; i<nzB; i++ ) { 1028 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1029 else break; 1030 } 1031 imark = i; 1032 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1033 for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1034 } 1035 } 1036 else {*idx = 0; *v=0;} 1037 } 1038 *nz = nztot; 1039 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1040 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1041 return 0; 1042 } 1043 1044 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1045 { 1046 if (idx) PETSCFREE(*idx); 1047 if (v) PETSCFREE(*v); 1048 return 0; 1049 } 1050 1051 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1052 { 1053 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1054 int ierr; 1055 if (aij->numtids == 1) { 1056 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1057 } else 1058 SETERRQ(1,"MatNorm_MPIAIJ: not yet supported in parallel."); 1059 return 0; 1060 } 1061 1062 static int MatTranspose_MPIAIJ(Mat A,Mat *Bin) 1063 { 1064 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1065 int ierr; 1066 Mat B; 1067 Mat_AIJ *Aloc; 1068 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1069 Scalar *array; 1070 1071 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1072 CHKERRQ(ierr); 1073 1074 /* copy over the A part */ 1075 Aloc = (Mat_AIJ*) a->A->data; 1076 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1077 row = a->rstart; 1078 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;} 1079 for ( i=0; i<m; i++ ) { 1080 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES); 1081 CHKERRQ(ierr); 1082 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1083 } 1084 aj = Aloc->j; 1085 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;} 1086 1087 /* copy over the B part */ 1088 Aloc = (Mat_AIJ*) a->B->data; 1089 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1090 row = a->rstart; 1091 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 1092 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];} 1093 for ( i=0; i<m; i++ ) { 1094 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES); 1095 CHKERRQ(ierr); 1096 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1097 } 1098 PETSCFREE(ct); 1099 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1100 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1101 *Bin = B; 1102 return 0; 1103 } 1104 1105 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1106 static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1107 1108 /* -------------------------------------------------------------------*/ 1109 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1110 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1111 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1112 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1113 MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1114 MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1115 MatLUFactor_MPIAIJ,0, 1116 MatRelax_MPIAIJ, 1117 MatTranspose_MPIAIJ, 1118 MatGetInfo_MPIAIJ,0, 1119 MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1120 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1121 0, 1122 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1123 MatGetReordering_MPIAIJ, 1124 MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1125 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1126 MatILUFactorSymbolic_MPIAIJ,0, 1127 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1128 1129 /*@ 1130 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1131 (the default parallel PETSc format). 1132 1133 Input Parameters: 1134 . comm - MPI communicator 1135 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1136 . n - number of local columns (or PETSC_DECIDE to have calculated 1137 if N is given) 1138 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1139 . N - number of global columns (or PETSC_DECIDE to have calculated 1140 if n is given) 1141 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1142 (same for all local rows) 1143 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1144 or null (possibly different for each row). You must leave room 1145 for the diagonal entry even if it is zero. 1146 . o_nz - number of nonzeros per row in off-diagonal portion of local 1147 submatrix (same for all local rows). 1148 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1149 submatrix or null (possibly different for each row). 1150 1151 Output Parameter: 1152 . newmat - the matrix 1153 1154 Notes: 1155 The AIJ format (also called the Yale sparse matrix format or 1156 compressed row storage), is fully compatible with standard Fortran 77 1157 storage. That is, the stored row and column indices begin at 1158 one, not zero. See the users manual for further details. 1159 1160 The user MUST specify either the local or global matrix dimensions 1161 (possibly both). 1162 1163 Storage Information: 1164 For a square global matrix we define each processor's diagonal portion 1165 to be its local rows and the corresponding columns (a square submatrix); 1166 each processor's off-diagonal portion encompasses the remainder of the 1167 local matrix (a rectangular submatrix). 1168 1169 The user can specify preallocated storage for the diagonal part of 1170 the local submatrix with either d_nz or d_nnz (not both). Set both 1171 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1172 Likewise, specify preallocated storage for the off-diagonal part of 1173 the local submatrix with o_nz or o_nnz (not both). 1174 1175 .keywords: matrix, aij, compressed row, sparse, parallel 1176 1177 .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues() 1178 @*/ 1179 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1180 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1181 { 1182 Mat mat; 1183 Mat_MPIAIJ *aij; 1184 int ierr, i,sum[2],work[2]; 1185 *newmat = 0; 1186 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1187 PLogObjectCreate(mat); 1188 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1189 mat->ops = &MatOps; 1190 mat->destroy = MatDestroy_MPIAIJ; 1191 mat->view = MatView_MPIAIJ; 1192 mat->factor = 0; 1193 1194 aij->insertmode = NOTSETVALUES; 1195 MPI_Comm_rank(comm,&aij->mytid); 1196 MPI_Comm_size(comm,&aij->numtids); 1197 1198 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1199 work[0] = m; work[1] = n; 1200 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 1201 if (M == PETSC_DECIDE) M = sum[0]; 1202 if (N == PETSC_DECIDE) N = sum[1]; 1203 } 1204 if (m == PETSC_DECIDE) 1205 {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 1206 if (n == PETSC_DECIDE) 1207 {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 1208 aij->m = m; 1209 aij->n = n; 1210 aij->N = N; 1211 aij->M = M; 1212 1213 /* build local table of row and column ownerships */ 1214 aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int)); 1215 CHKPTRQ(aij->rowners); 1216 aij->cowners = aij->rowners + aij->numtids + 1; 1217 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 1218 aij->rowners[0] = 0; 1219 for ( i=2; i<=aij->numtids; i++ ) { 1220 aij->rowners[i] += aij->rowners[i-1]; 1221 } 1222 aij->rstart = aij->rowners[aij->mytid]; 1223 aij->rend = aij->rowners[aij->mytid+1]; 1224 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 1225 aij->cowners[0] = 0; 1226 for ( i=2; i<=aij->numtids; i++ ) { 1227 aij->cowners[i] += aij->cowners[i-1]; 1228 } 1229 aij->cstart = aij->cowners[aij->mytid]; 1230 aij->cend = aij->cowners[aij->mytid+1]; 1231 1232 1233 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A); 1234 CHKERRQ(ierr); 1235 PLogObjectParent(mat,aij->A); 1236 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B); 1237 CHKERRQ(ierr); 1238 PLogObjectParent(mat,aij->B); 1239 1240 /* build cache for off array entries formed */ 1241 ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr); 1242 aij->colmap = 0; 1243 aij->garray = 0; 1244 1245 /* stuff used for matrix vector multiply */ 1246 aij->lvec = 0; 1247 aij->Mvctx = 0; 1248 aij->assembled = 0; 1249 1250 *newmat = mat; 1251 return 0; 1252 } 1253 1254 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1255 { 1256 Mat mat; 1257 Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 1258 int ierr, len; 1259 *newmat = 0; 1260 1261 if (!oldmat->assembled) SETERRQ(1,"Cannot copy unassembled matrix"); 1262 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1263 PLogObjectCreate(mat); 1264 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1265 mat->ops = &MatOps; 1266 mat->destroy = MatDestroy_MPIAIJ; 1267 mat->view = MatView_MPIAIJ; 1268 mat->factor = matin->factor; 1269 1270 aij->m = oldmat->m; 1271 aij->n = oldmat->n; 1272 aij->M = oldmat->M; 1273 aij->N = oldmat->N; 1274 1275 aij->assembled = 1; 1276 aij->rstart = oldmat->rstart; 1277 aij->rend = oldmat->rend; 1278 aij->cstart = oldmat->cstart; 1279 aij->cend = oldmat->cend; 1280 aij->numtids = oldmat->numtids; 1281 aij->mytid = oldmat->mytid; 1282 aij->insertmode = NOTSETVALUES; 1283 1284 aij->rowners = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) ); 1285 CHKPTRQ(aij->rowners); 1286 PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1287 ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr); 1288 if (oldmat->colmap) { 1289 aij->colmap = (int *) PETSCMALLOC( (aij->N)*sizeof(int) ); 1290 CHKPTRQ(aij->colmap); 1291 PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int)); 1292 } else aij->colmap = 0; 1293 if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) { 1294 aij->garray = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray); 1295 PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int)); 1296 } else aij->garray = 0; 1297 1298 ierr = VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr); 1299 PLogObjectParent(mat,aij->lvec); 1300 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr); 1301 PLogObjectParent(mat,aij->Mvctx); 1302 ierr = MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr); 1303 PLogObjectParent(mat,aij->A); 1304 ierr = MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr); 1305 PLogObjectParent(mat,aij->B); 1306 *newmat = mat; 1307 return 0; 1308 } 1309