1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.55 1995/07/06 17:47:13 curfman Exp curfman $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "vec/vecimpl.h" 7 #include "inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 static int CreateColmap_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 17 Mat_AIJ *B = (Mat_AIJ*) aij->B->data; 18 int n = B->n,i; 19 aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 20 PETSCMEMSET(aij->colmap,0,aij->N*sizeof(int)); 21 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 22 return 0; 23 } 24 25 extern int DisAssemble_MPIAIJ(Mat); 26 27 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 28 int *idxn,Scalar *v,InsertMode addv) 29 { 30 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 31 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 32 int cstart = aij->cstart, cend = aij->cend,row,col; 33 34 if (aij->insertmode != NOTSETVALUES && aij->insertmode != addv) { 35 SETERRQ(1,"You cannot mix inserts and adds"); 36 } 37 aij->insertmode = addv; 38 for ( i=0; i<m; i++ ) { 39 if (idxm[i] < 0) SETERRQ(1,"Negative row index"); 40 if (idxm[i] >= aij->M) SETERRQ(1,"Row index too large"); 41 if (idxm[i] >= rstart && idxm[i] < rend) { 42 row = idxm[i] - rstart; 43 for ( j=0; j<n; j++ ) { 44 if (idxn[j] < 0) SETERRQ(1,"Negative column index"); 45 if (idxn[j] >= aij->N) SETERRQ(1,"Column index too large"); 46 if (idxn[j] >= cstart && idxn[j] < cend){ 47 col = idxn[j] - cstart; 48 ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 49 } 50 else { 51 if (aij->assembled) { 52 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 53 col = aij->colmap[idxn[j]] - 1; 54 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 55 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 56 col = idxn[j]; 57 } 58 } 59 else col = idxn[j]; 60 ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 61 } 62 } 63 } 64 else { 65 ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv); 66 CHKERRQ(ierr); 67 } 68 } 69 return 0; 70 } 71 72 /* 73 the assembly code is a lot like the code for vectors, we should 74 sometime derive a single assembly code that can be used for 75 either case. 76 */ 77 78 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 79 { 80 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 81 MPI_Comm comm = mat->comm; 82 int numtids = aij->numtids, *owners = aij->rowners; 83 int mytid = aij->mytid; 84 MPI_Request *send_waits,*recv_waits; 85 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 86 int tag = mat->tag, *owner,*starts,count,ierr; 87 InsertMode addv; 88 Scalar *rvalues,*svalues; 89 90 /* make sure all processors are either in INSERTMODE or ADDMODE */ 91 MPI_Allreduce((void *) &aij->insertmode,(void *) &addv,1,MPI_INT, 92 MPI_BOR,comm); 93 if (addv == (ADDVALUES|INSERTVALUES)) { 94 SETERRQ(1,"Some processors have inserted while others have added"); 95 } 96 aij->insertmode = addv; /* in case this processor had no cache */ 97 98 /* first count number of contributors to each processor */ 99 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 100 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 101 owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 102 for ( i=0; i<aij->stash.n; i++ ) { 103 idx = aij->stash.idx[i]; 104 for ( j=0; j<numtids; j++ ) { 105 if (idx >= owners[j] && idx < owners[j+1]) { 106 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 107 } 108 } 109 } 110 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 111 112 /* inform other processors of number of messages and max length*/ 113 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 114 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 115 nreceives = work[mytid]; 116 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 117 nmax = work[mytid]; 118 PETSCFREE(work); 119 120 /* post receives: 121 1) each message will consist of ordered pairs 122 (global index,value) we store the global index as a double 123 to simplify the message passing. 124 2) since we don't know how long each individual message is we 125 allocate the largest needed buffer for each receive. Potentially 126 this is a lot of wasted space. 127 128 129 This could be done better. 130 */ 131 rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 132 CHKPTRQ(rvalues); 133 recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 134 CHKPTRQ(recv_waits); 135 for ( i=0; i<nreceives; i++ ) { 136 MPI_Irecv((void *)(rvalues+3*nmax*i),3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 137 comm,recv_waits+i); 138 } 139 140 /* do sends: 141 1) starts[i] gives the starting index in svalues for stuff going to 142 the ith processor 143 */ 144 svalues = (Scalar *) PETSCMALLOC( 3*(aij->stash.n+1)*sizeof(Scalar) ); 145 CHKPTRQ(svalues); 146 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 147 CHKPTRQ(send_waits); 148 starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts); 149 starts[0] = 0; 150 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 151 for ( i=0; i<aij->stash.n; i++ ) { 152 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 153 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 154 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 155 } 156 PETSCFREE(owner); 157 starts[0] = 0; 158 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 159 count = 0; 160 for ( i=0; i<numtids; i++ ) { 161 if (procs[i]) { 162 MPI_Isend((void*)(svalues+3*starts[i]),3*nprocs[i],MPIU_SCALAR,i,tag, 163 comm,send_waits+count++); 164 } 165 } 166 PETSCFREE(starts); PETSCFREE(nprocs); 167 168 /* Free cache space */ 169 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 170 171 aij->svalues = svalues; aij->rvalues = rvalues; 172 aij->nsends = nsends; aij->nrecvs = nreceives; 173 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 174 aij->rmax = nmax; 175 176 return 0; 177 } 178 extern int MatSetUpMultiply_MPIAIJ(Mat); 179 180 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 181 { 182 int ierr; 183 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 184 MPI_Status *send_status,recv_status; 185 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n; 186 int row,col,other_disassembled; 187 Scalar *values,val; 188 InsertMode addv = aij->insertmode; 189 190 /* wait on receives */ 191 while (count) { 192 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 193 /* unpack receives into our local space */ 194 values = aij->rvalues + 3*imdex*aij->rmax; 195 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 196 n = n/3; 197 for ( i=0; i<n; i++ ) { 198 row = (int) PETSCREAL(values[3*i]) - aij->rstart; 199 col = (int) PETSCREAL(values[3*i+1]); 200 val = values[3*i+2]; 201 if (col >= aij->cstart && col < aij->cend) { 202 col -= aij->cstart; 203 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 204 } 205 else { 206 if (aij->assembled) { 207 if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 208 col = aij->colmap[col] - 1; 209 if (col < 0 && !((Mat_AIJ*)(aij->A->data))->nonew) { 210 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 211 col = (int) PETSCREAL(values[3*i+1]); 212 } 213 } 214 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 215 } 216 } 217 count--; 218 } 219 PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 220 221 /* wait on sends */ 222 if (aij->nsends) { 223 send_status = (MPI_Status *) PETSCMALLOC( aij->nsends*sizeof(MPI_Status) ); 224 CHKPTRQ(send_status); 225 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 226 PETSCFREE(send_status); 227 } 228 PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 229 230 aij->insertmode = NOTSETVALUES; 231 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 232 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 233 234 /* determine if any processor has disassembled, if so we must 235 also disassemble ourselfs, in order that we may reassemble. */ 236 MPI_Allreduce((void *) &aij->assembled,(void *) &other_disassembled,1, 237 MPI_INT,MPI_PROD,mat->comm); 238 if (aij->assembled && !other_disassembled) { 239 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 240 } 241 242 if (!aij->assembled && mode == FINAL_ASSEMBLY) { 243 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 244 aij->assembled = 1; 245 } 246 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 248 249 return 0; 250 } 251 252 static int MatZeroEntries_MPIAIJ(Mat A) 253 { 254 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 255 int ierr; 256 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 257 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 258 return 0; 259 } 260 261 /* again this uses the same basic stratagy as in the assembly and 262 scatter create routines, we should try to do it systemamatically 263 if we can figure out the proper level of generality. */ 264 265 /* the code does not do the diagonal entries correctly unless the 266 matrix is square and the column and row owerships are identical. 267 This is a BUG. The only way to fix it seems to be to access 268 aij->A and aij->B directly and not through the MatZeroRows() 269 routine. 270 */ 271 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 272 { 273 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 274 int i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids; 275 int *procs,*nprocs,j,found,idx,nsends,*work; 276 int nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid; 277 int *rvalues,tag = A->tag,count,base,slen,n,*source; 278 int *lens,imdex,*lrows,*values; 279 MPI_Comm comm = A->comm; 280 MPI_Request *send_waits,*recv_waits; 281 MPI_Status recv_status,*send_status; 282 IS istmp; 283 284 if (!l->assembled) 285 SETERRQ(1,"MatZeroRows_MPIAIJ: Must assemble matrix first"); 286 ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 287 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 288 289 /* first count number of contributors to each processor */ 290 nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs); 291 PETSCMEMSET(nprocs,0,2*numtids*sizeof(int)); procs = nprocs + numtids; 292 owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 293 for ( i=0; i<N; i++ ) { 294 idx = rows[i]; 295 found = 0; 296 for ( j=0; j<numtids; j++ ) { 297 if (idx >= owners[j] && idx < owners[j+1]) { 298 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 299 } 300 } 301 if (!found) SETERRQ(1,"Index out of range."); 302 } 303 nsends = 0; for ( i=0; i<numtids; i++ ) { nsends += procs[i];} 304 305 /* inform other processors of number of messages and max length*/ 306 work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work); 307 MPI_Allreduce((void *) procs,(void *) work,numtids,MPI_INT,MPI_SUM,comm); 308 nrecvs = work[mytid]; 309 MPI_Allreduce((void *) nprocs,(void *) work,numtids,MPI_INT,MPI_MAX,comm); 310 nmax = work[mytid]; 311 PETSCFREE(work); 312 313 /* post receives: */ 314 rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 315 CHKPTRQ(rvalues); 316 recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 317 CHKPTRQ(recv_waits); 318 for ( i=0; i<nrecvs; i++ ) { 319 MPI_Irecv((void *)(rvalues+nmax*i),nmax,MPI_INT,MPI_ANY_SOURCE,tag, 320 comm,recv_waits+i); 321 } 322 323 /* do sends: 324 1) starts[i] gives the starting index in svalues for stuff going to 325 the ith processor 326 */ 327 svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 328 send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 329 CHKPTRQ(send_waits); 330 starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts); 331 starts[0] = 0; 332 for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 333 for ( i=0; i<N; i++ ) { 334 svalues[starts[owner[i]]++] = rows[i]; 335 } 336 ISRestoreIndices(is,&rows); 337 338 starts[0] = 0; 339 for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 340 count = 0; 341 for ( i=0; i<numtids; i++ ) { 342 if (procs[i]) { 343 MPI_Isend((void*)(svalues+starts[i]),nprocs[i],MPI_INT,i,tag, 344 comm,send_waits+count++); 345 } 346 } 347 PETSCFREE(starts); 348 349 base = owners[mytid]; 350 351 /* wait on receives */ 352 lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 353 source = lens + nrecvs; 354 count = nrecvs; slen = 0; 355 while (count) { 356 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 357 /* unpack receives into our local space */ 358 MPI_Get_count(&recv_status,MPI_INT,&n); 359 source[imdex] = recv_status.MPI_SOURCE; 360 lens[imdex] = n; 361 slen += n; 362 count--; 363 } 364 PETSCFREE(recv_waits); 365 366 /* move the data into the send scatter */ 367 lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 368 count = 0; 369 for ( i=0; i<nrecvs; i++ ) { 370 values = rvalues + i*nmax; 371 for ( j=0; j<lens[i]; j++ ) { 372 lrows[count++] = values[j] - base; 373 } 374 } 375 PETSCFREE(rvalues); PETSCFREE(lens); 376 PETSCFREE(owner); PETSCFREE(nprocs); 377 378 /* actually zap the local rows */ 379 ierr = ISCreateSequential(MPI_COMM_SELF,slen,lrows,&istmp); 380 CHKERRQ(ierr); PETSCFREE(lrows); 381 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 382 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 383 ierr = ISDestroy(istmp); CHKERRQ(ierr); 384 385 /* wait on sends */ 386 if (nsends) { 387 send_status = (MPI_Status *) PETSCMALLOC( nsends*sizeof(MPI_Status) ); 388 CHKPTRQ(send_status); 389 MPI_Waitall(nsends,send_waits,send_status); 390 PETSCFREE(send_status); 391 } 392 PETSCFREE(send_waits); PETSCFREE(svalues); 393 394 395 return 0; 396 } 397 398 static int MatMult_MPIAIJ(Mat aijin,Vec xx,Vec yy) 399 { 400 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 401 int ierr; 402 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first"); 403 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 404 CHKERRQ(ierr); 405 ierr = MatMult(aij->A,xx,yy); CHKERRQ(ierr); 406 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 407 CHKERRQ(ierr); 408 ierr = MatMultAdd(aij->B,aij->lvec,yy,yy); CHKERRQ(ierr); 409 return 0; 410 } 411 412 static int MatMultAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 413 { 414 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 415 int ierr; 416 if (!aij->assembled) SETERRQ(1,"MatMult_MPIAIJ: must assemble matrix first"); 417 ierr = VecScatterBegin(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 418 CHKERRQ(ierr); 419 ierr = MatMultAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 420 ierr = VecScatterEnd(xx,aij->lvec,INSERTVALUES,SCATTERALL,aij->Mvctx); 421 CHKERRQ(ierr); 422 ierr = MatMultAdd(aij->B,aij->lvec,zz,zz); CHKERRQ(ierr); 423 return 0; 424 } 425 426 static int MatMultTrans_MPIAIJ(Mat aijin,Vec xx,Vec yy) 427 { 428 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 429 int ierr; 430 431 if (!aij->assembled) 432 SETERRQ(1,"MatMulTrans_MPIAIJ: must assemble matrix first"); 433 /* do nondiagonal part */ 434 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 435 /* send it on its way */ 436 ierr = VecScatterBegin(aij->lvec,yy,ADDVALUES, 437 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 438 /* do local part */ 439 ierr = MatMultTrans(aij->A,xx,yy); CHKERRQ(ierr); 440 /* receive remote parts: note this assumes the values are not actually */ 441 /* inserted in yy until the next line, which is true for my implementation*/ 442 /* but is not perhaps always true. */ 443 ierr = VecScatterEnd(aij->lvec,yy,ADDVALUES, 444 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 445 return 0; 446 } 447 448 static int MatMultTransAdd_MPIAIJ(Mat aijin,Vec xx,Vec yy,Vec zz) 449 { 450 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 451 int ierr; 452 453 if (!aij->assembled) 454 SETERRQ(1,"MatMulTransAdd_MPIAIJ: must assemble matrix first"); 455 /* do nondiagonal part */ 456 ierr = MatMultTrans(aij->B,xx,aij->lvec); CHKERRQ(ierr); 457 /* send it on its way */ 458 ierr = VecScatterBegin(aij->lvec,zz,ADDVALUES, 459 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 460 /* do local part */ 461 ierr = MatMultTransAdd(aij->A,xx,yy,zz); CHKERRQ(ierr); 462 /* receive remote parts: note this assumes the values are not actually */ 463 /* inserted in yy until the next line, which is true for my implementation*/ 464 /* but is not perhaps always true. */ 465 ierr = VecScatterEnd(aij->lvec,zz,ADDVALUES, 466 (ScatterMode)(SCATTERALL|SCATTERREVERSE),aij->Mvctx); CHKERRQ(ierr); 467 return 0; 468 } 469 470 /* 471 This only works correctly for square matrices where the subblock A->A is the 472 diagonal block 473 */ 474 static int MatGetDiagonal_MPIAIJ(Mat Ain,Vec v) 475 { 476 Mat_MPIAIJ *A = (Mat_MPIAIJ *) Ain->data; 477 if (!A->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ: must assemble matrix first"); 478 return MatGetDiagonal(A->A,v); 479 } 480 481 static int MatDestroy_MPIAIJ(PetscObject obj) 482 { 483 Mat mat = (Mat) obj; 484 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 485 int ierr; 486 #if defined(PETSC_LOG) 487 PLogObjectState(obj,"Rows %d Cols %d",aij->M,aij->N); 488 #endif 489 PETSCFREE(aij->rowners); 490 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 491 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 492 if (aij->colmap) PETSCFREE(aij->colmap); 493 if (aij->garray) PETSCFREE(aij->garray); 494 if (aij->lvec) VecDestroy(aij->lvec); 495 if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 496 PETSCFREE(aij); 497 PLogObjectDestroy(mat); 498 PETSCHEADERDESTROY(mat); 499 return 0; 500 } 501 #include "draw.h" 502 #include "pviewer.h" 503 504 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 505 { 506 Mat mat = (Mat) obj; 507 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 508 int ierr; 509 PetscObject vobj = (PetscObject) viewer; 510 511 if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ: must assemble matrix first"); 512 if (!viewer) { /* so that viewers may be used from debuggers */ 513 viewer = STDOUT_VIEWER; vobj = (PetscObject) viewer; 514 } 515 if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 516 if (vobj->cookie == VIEWER_COOKIE && vobj->type == FILE_VIEWER) { 517 FILE *fd = ViewerFileGetPointer_Private(viewer); 518 MPIU_Seq_begin(mat->comm,1); 519 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 520 aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 521 aij->cend); 522 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 523 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 524 fflush(fd); 525 MPIU_Seq_end(mat->comm,1); 526 } 527 else if ((vobj->cookie == VIEWER_COOKIE && vobj->type == FILES_VIEWER) || 528 vobj->cookie == DRAW_COOKIE) { 529 int numtids = aij->numtids, mytid = aij->mytid; 530 if (numtids == 1) { 531 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 532 } 533 else { 534 /* assemble the entire matrix onto first processor. */ 535 Mat A; 536 Mat_AIJ *Aloc; 537 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 538 Scalar *a; 539 540 if (!mytid) { 541 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A); 542 } 543 else { 544 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A); 545 } 546 CHKERRQ(ierr); 547 548 /* copy over the A part */ 549 Aloc = (Mat_AIJ*) aij->A->data; 550 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 551 row = aij->rstart; 552 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += aij->cstart - 1;} 553 for ( i=0; i<m; i++ ) { 554 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERTVALUES); 555 CHKERRQ(ierr); 556 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 557 } 558 aj = Aloc->j; 559 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= aij->cstart - 1;} 560 561 /* copy over the B part */ 562 Aloc = (Mat_AIJ*) aij->B->data; 563 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 564 row = aij->rstart; 565 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 566 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = aij->garray[aj[i]-1];} 567 for ( i=0; i<m; i++ ) { 568 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERTVALUES); 569 CHKERRQ(ierr); 570 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 571 } 572 PETSCFREE(ct); 573 ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 575 if (!mytid) { 576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 577 } 578 ierr = MatDestroy(A); CHKERRQ(ierr); 579 } 580 } 581 return 0; 582 } 583 584 extern int MatMarkDiag_AIJ(Mat_AIJ *); 585 /* 586 This has to provide several versions. 587 588 1) per sequential 589 2) a) use only local smoothing updating outer values only once. 590 b) local smoothing updating outer values each inner iteration 591 3) color updating out values betwen colors. 592 */ 593 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 594 double shift,int its,Vec xx) 595 { 596 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 597 Mat AA = mat->A, BB = mat->B; 598 Mat_AIJ *A = (Mat_AIJ *) AA->data, *B = (Mat_AIJ *)BB->data; 599 Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 600 int ierr,*idx, *diag; 601 int n = mat->n, m = mat->m, i; 602 Vec tt; 603 604 if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ: must assemble matrix first"); 605 606 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 607 xs = x -1; /* shift by one for index start of 1 */ 608 ls--; 609 if (!A->diag) {if ((ierr = MatMarkDiag_AIJ(A))) return ierr;} 610 diag = A->diag; 611 if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 612 SETERRQ(1,"That option not yet support for parallel AIJ matrices"); 613 } 614 if (flag & SOR_EISENSTAT) { 615 /* Let A = L + U + D; where L is lower trianglar, 616 U is upper triangular, E is diagonal; This routine applies 617 618 (L + E)^{-1} A (U + E)^{-1} 619 620 to a vector efficiently using Eisenstat's trick. This is for 621 the case of SSOR preconditioner, so E is D/omega where omega 622 is the relaxation factor. 623 */ 624 ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 625 VecGetArray(tt,&t); 626 scale = (2.0/omega) - 1.0; 627 /* x = (E + U)^{-1} b */ 628 VecSet(&zero,mat->lvec); 629 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 630 mat->Mvctx); CHKERRQ(ierr); 631 for ( i=m-1; i>-1; i-- ) { 632 n = A->i[i+1] - diag[i] - 1; 633 idx = A->j + diag[i]; 634 v = A->a + diag[i]; 635 sum = b[i]; 636 SPARSEDENSEMDOT(sum,xs,v,idx,n); 637 d = shift + A->a[diag[i]-1]; 638 n = B->i[i+1] - B->i[i]; 639 idx = B->j + B->i[i] - 1; 640 v = B->a + B->i[i] - 1; 641 SPARSEDENSEMDOT(sum,ls,v,idx,n); 642 x[i] = omega*(sum/d); 643 } 644 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 645 mat->Mvctx); CHKERRQ(ierr); 646 647 /* t = b - (2*E - D)x */ 648 v = A->a; 649 for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ - 1])*x[i]; } 650 651 /* t = (E + L)^{-1}t */ 652 ts = t - 1; /* shifted by one for index start of a or mat->j*/ 653 diag = A->diag; 654 VecSet(&zero,mat->lvec); 655 ierr = VecPipelineBegin(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 656 mat->Mvctx); CHKERRQ(ierr); 657 for ( i=0; i<m; i++ ) { 658 n = diag[i] - A->i[i]; 659 idx = A->j + A->i[i] - 1; 660 v = A->a + A->i[i] - 1; 661 sum = t[i]; 662 SPARSEDENSEMDOT(sum,ts,v,idx,n); 663 d = shift + A->a[diag[i]-1]; 664 n = B->i[i+1] - B->i[i]; 665 idx = B->j + B->i[i] - 1; 666 v = B->a + B->i[i] - 1; 667 SPARSEDENSEMDOT(sum,ls,v,idx,n); 668 t[i] = omega*(sum/d); 669 } 670 ierr = VecPipelineEnd(tt,mat->lvec,INSERTVALUES,PIPELINEDOWN, 671 mat->Mvctx); CHKERRQ(ierr); 672 /* x = x + t */ 673 for ( i=0; i<m; i++ ) { x[i] += t[i]; } 674 VecDestroy(tt); 675 return 0; 676 } 677 678 679 if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 680 if (flag & SOR_ZERO_INITIAL_GUESS) { 681 VecSet(&zero,mat->lvec); VecSet(&zero,xx); 682 } 683 else { 684 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 685 CHKERRQ(ierr); 686 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 687 CHKERRQ(ierr); 688 } 689 while (its--) { 690 /* go down through the rows */ 691 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 692 mat->Mvctx); CHKERRQ(ierr); 693 for ( i=0; i<m; i++ ) { 694 n = A->i[i+1] - A->i[i]; 695 idx = A->j + A->i[i] - 1; 696 v = A->a + A->i[i] - 1; 697 sum = b[i]; 698 SPARSEDENSEMDOT(sum,xs,v,idx,n); 699 d = shift + A->a[diag[i]-1]; 700 n = B->i[i+1] - B->i[i]; 701 idx = B->j + B->i[i] - 1; 702 v = B->a + B->i[i] - 1; 703 SPARSEDENSEMDOT(sum,ls,v,idx,n); 704 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 705 } 706 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 707 mat->Mvctx); CHKERRQ(ierr); 708 /* come up through the rows */ 709 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 710 mat->Mvctx); CHKERRQ(ierr); 711 for ( i=m-1; i>-1; i-- ) { 712 n = A->i[i+1] - A->i[i]; 713 idx = A->j + A->i[i] - 1; 714 v = A->a + A->i[i] - 1; 715 sum = b[i]; 716 SPARSEDENSEMDOT(sum,xs,v,idx,n); 717 d = shift + A->a[diag[i]-1]; 718 n = B->i[i+1] - B->i[i]; 719 idx = B->j + B->i[i] - 1; 720 v = B->a + B->i[i] - 1; 721 SPARSEDENSEMDOT(sum,ls,v,idx,n); 722 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 723 } 724 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 725 mat->Mvctx); CHKERRQ(ierr); 726 } 727 } 728 else if (flag & SOR_FORWARD_SWEEP){ 729 if (flag & SOR_ZERO_INITIAL_GUESS) { 730 VecSet(&zero,mat->lvec); 731 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 732 mat->Mvctx); CHKERRQ(ierr); 733 for ( i=0; i<m; i++ ) { 734 n = diag[i] - A->i[i]; 735 idx = A->j + A->i[i] - 1; 736 v = A->a + A->i[i] - 1; 737 sum = b[i]; 738 SPARSEDENSEMDOT(sum,xs,v,idx,n); 739 d = shift + A->a[diag[i]-1]; 740 n = B->i[i+1] - B->i[i]; 741 idx = B->j + B->i[i] - 1; 742 v = B->a + B->i[i] - 1; 743 SPARSEDENSEMDOT(sum,ls,v,idx,n); 744 x[i] = omega*(sum/d); 745 } 746 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 747 mat->Mvctx); CHKERRQ(ierr); 748 its--; 749 } 750 while (its--) { 751 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 752 CHKERRQ(ierr); 753 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERUP,mat->Mvctx); 754 CHKERRQ(ierr); 755 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 756 mat->Mvctx); CHKERRQ(ierr); 757 for ( i=0; i<m; i++ ) { 758 n = A->i[i+1] - A->i[i]; 759 idx = A->j + A->i[i] - 1; 760 v = A->a + A->i[i] - 1; 761 sum = b[i]; 762 SPARSEDENSEMDOT(sum,xs,v,idx,n); 763 d = shift + A->a[diag[i]-1]; 764 n = B->i[i+1] - B->i[i]; 765 idx = B->j + B->i[i] - 1; 766 v = B->a + B->i[i] - 1; 767 SPARSEDENSEMDOT(sum,ls,v,idx,n); 768 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 769 } 770 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEDOWN, 771 mat->Mvctx); CHKERRQ(ierr); 772 } 773 } 774 else if (flag & SOR_BACKWARD_SWEEP){ 775 if (flag & SOR_ZERO_INITIAL_GUESS) { 776 VecSet(&zero,mat->lvec); 777 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 778 mat->Mvctx); CHKERRQ(ierr); 779 for ( i=m-1; i>-1; i-- ) { 780 n = A->i[i+1] - diag[i] - 1; 781 idx = A->j + diag[i]; 782 v = A->a + diag[i]; 783 sum = b[i]; 784 SPARSEDENSEMDOT(sum,xs,v,idx,n); 785 d = shift + A->a[diag[i]-1]; 786 n = B->i[i+1] - B->i[i]; 787 idx = B->j + B->i[i] - 1; 788 v = B->a + B->i[i] - 1; 789 SPARSEDENSEMDOT(sum,ls,v,idx,n); 790 x[i] = omega*(sum/d); 791 } 792 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 793 mat->Mvctx); CHKERRQ(ierr); 794 its--; 795 } 796 while (its--) { 797 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 798 mat->Mvctx); CHKERRQ(ierr); 799 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERDOWN, 800 mat->Mvctx); CHKERRQ(ierr); 801 ierr = VecPipelineBegin(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 802 mat->Mvctx); CHKERRQ(ierr); 803 for ( i=m-1; i>-1; i-- ) { 804 n = A->i[i+1] - A->i[i]; 805 idx = A->j + A->i[i] - 1; 806 v = A->a + A->i[i] - 1; 807 sum = b[i]; 808 SPARSEDENSEMDOT(sum,xs,v,idx,n); 809 d = shift + A->a[diag[i]-1]; 810 n = B->i[i+1] - B->i[i]; 811 idx = B->j + B->i[i] - 1; 812 v = B->a + B->i[i] - 1; 813 SPARSEDENSEMDOT(sum,ls,v,idx,n); 814 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 815 } 816 ierr = VecPipelineEnd(xx,mat->lvec,INSERTVALUES,PIPELINEUP, 817 mat->Mvctx); CHKERRQ(ierr); 818 } 819 } 820 else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 821 if (flag & SOR_ZERO_INITIAL_GUESS) { 822 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 823 } 824 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 825 CHKERRQ(ierr); 826 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 827 CHKERRQ(ierr); 828 while (its--) { 829 /* go down through the rows */ 830 for ( i=0; i<m; i++ ) { 831 n = A->i[i+1] - A->i[i]; 832 idx = A->j + A->i[i] - 1; 833 v = A->a + A->i[i] - 1; 834 sum = b[i]; 835 SPARSEDENSEMDOT(sum,xs,v,idx,n); 836 d = shift + A->a[diag[i]-1]; 837 n = B->i[i+1] - B->i[i]; 838 idx = B->j + B->i[i] - 1; 839 v = B->a + B->i[i] - 1; 840 SPARSEDENSEMDOT(sum,ls,v,idx,n); 841 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 842 } 843 /* come up through the rows */ 844 for ( i=m-1; i>-1; i-- ) { 845 n = A->i[i+1] - A->i[i]; 846 idx = A->j + A->i[i] - 1; 847 v = A->a + A->i[i] - 1; 848 sum = b[i]; 849 SPARSEDENSEMDOT(sum,xs,v,idx,n); 850 d = shift + A->a[diag[i]-1]; 851 n = B->i[i+1] - B->i[i]; 852 idx = B->j + B->i[i] - 1; 853 v = B->a + B->i[i] - 1; 854 SPARSEDENSEMDOT(sum,ls,v,idx,n); 855 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 856 } 857 } 858 } 859 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 860 if (flag & SOR_ZERO_INITIAL_GUESS) { 861 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 862 } 863 ierr=VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 864 CHKERRQ(ierr); 865 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL,mat->Mvctx); 866 CHKERRQ(ierr); 867 while (its--) { 868 for ( i=0; i<m; i++ ) { 869 n = A->i[i+1] - A->i[i]; 870 idx = A->j + A->i[i] - 1; 871 v = A->a + A->i[i] - 1; 872 sum = b[i]; 873 SPARSEDENSEMDOT(sum,xs,v,idx,n); 874 d = shift + A->a[diag[i]-1]; 875 n = B->i[i+1] - B->i[i]; 876 idx = B->j + B->i[i] - 1; 877 v = B->a + B->i[i] - 1; 878 SPARSEDENSEMDOT(sum,ls,v,idx,n); 879 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 880 } 881 } 882 } 883 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 884 if (flag & SOR_ZERO_INITIAL_GUESS) { 885 return MatRelax(mat->A,bb,omega,flag,shift,its,xx); 886 } 887 ierr = VecScatterBegin(xx,mat->lvec,INSERTVALUES,SCATTERALL, 888 mat->Mvctx); CHKERRQ(ierr); 889 ierr = VecScatterEnd(xx,mat->lvec,INSERTVALUES,SCATTERALL, 890 mat->Mvctx); CHKERRQ(ierr); 891 while (its--) { 892 for ( i=m-1; i>-1; i-- ) { 893 n = A->i[i+1] - A->i[i]; 894 idx = A->j + A->i[i] - 1; 895 v = A->a + A->i[i] - 1; 896 sum = b[i]; 897 SPARSEDENSEMDOT(sum,xs,v,idx,n); 898 d = shift + A->a[diag[i]-1]; 899 n = B->i[i+1] - B->i[i]; 900 idx = B->j + B->i[i] - 1; 901 v = B->a + B->i[i] - 1; 902 SPARSEDENSEMDOT(sum,ls,v,idx,n); 903 x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 904 } 905 } 906 } 907 return 0; 908 } 909 910 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 911 int *nzalloc,int *mem) 912 { 913 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 914 Mat A = mat->A, B = mat->B; 915 int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 916 917 ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 918 ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 919 isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 920 if (flag == MAT_LOCAL) { 921 *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 922 } else if (flag == MAT_GLOBAL_MAX) { 923 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_MAX,matin->comm); 924 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 925 } else if (flag == MAT_GLOBAL_SUM) { 926 MPI_Allreduce((void *) isend,(void *) irecv,3,MPI_INT,MPI_SUM,matin->comm); 927 *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 928 } 929 return 0; 930 } 931 932 static int MatSetOption_MPIAIJ(Mat aijin,MatOption op) 933 { 934 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) aijin->data; 935 936 if (op == NO_NEW_NONZERO_LOCATIONS) { 937 MatSetOption(aij->A,op); 938 MatSetOption(aij->B,op); 939 } 940 else if (op == YES_NEW_NONZERO_LOCATIONS) { 941 MatSetOption(aij->A,op); 942 MatSetOption(aij->B,op); 943 } 944 else if (op == COLUMN_ORIENTED) SETERRQ(1,"Column oriented not supported"); 945 return 0; 946 } 947 948 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 949 { 950 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 951 *m = mat->M; *n = mat->N; 952 return 0; 953 } 954 955 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 956 { 957 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 958 *m = mat->m; *n = mat->N; 959 return 0; 960 } 961 962 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 963 { 964 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 965 *m = mat->rstart; *n = mat->rend; 966 return 0; 967 } 968 969 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 970 { 971 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 972 Scalar *vworkA, *vworkB, **pvA, **pvB; 973 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 974 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 975 976 if (row < rstart || row >= rend) 977 SETERRQ(1,"MatGetRow_MPIAIJ: Currently you can get only local rows.") 978 lrow = row - rstart; 979 980 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 981 if (!v) {pvA = 0; pvB = 0;} 982 if (!idx) {pcA = 0; if (!v) pcB = 0;} 983 ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 984 ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 985 nztot = nzA + nzB; 986 987 if (v || idx) { 988 if (nztot) { 989 /* Sort by increasing column numbers, assuming A and B already sorted */ 990 int imark, imark2; 991 if (mat->assembled) { 992 for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 993 } 994 if (v) { 995 *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 996 for ( i=0; i<nzB; i++ ) { 997 if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 998 else break; 999 } 1000 imark = i; 1001 for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1002 imark2 = imark+nzA; 1003 for ( i=imark; i<nzB; i++ ) (*v)[imark2+i] = vworkB[i]; 1004 } 1005 if (idx) { 1006 *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1007 for (i=0; i<nzA; i++) cworkA[i] += cstart; 1008 for ( i=0; i<nzB; i++ ) { 1009 if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1010 else break; 1011 } 1012 imark = i; 1013 for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1014 imark2 = imark+nzA; 1015 for ( i=imark; i<nzB; i++ ) (*idx)[imark2+i] = cworkB[i]; 1016 } 1017 } 1018 else {*idx = 0; *v=0;} 1019 } 1020 *nz = nztot; 1021 ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1022 ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1023 return 0; 1024 } 1025 1026 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1027 { 1028 if (idx) PETSCFREE(*idx); 1029 if (v) PETSCFREE(*v); 1030 return 0; 1031 } 1032 1033 static int MatTranspose_MPIAIJ(Mat A,Mat *Bin) 1034 { 1035 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1036 int ierr; 1037 Mat B; 1038 Mat_AIJ *Aloc; 1039 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1040 Scalar *array; 1041 1042 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1043 CHKERRQ(ierr); 1044 1045 /* copy over the A part */ 1046 Aloc = (Mat_AIJ*) a->A->data; 1047 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1048 row = a->rstart; 1049 for ( i=0; i<ai[m]-1; i++ ) {aj[i] += a->cstart - 1;} 1050 for ( i=0; i<m; i++ ) { 1051 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERTVALUES); 1052 CHKERRQ(ierr); 1053 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1054 } 1055 aj = Aloc->j; 1056 for ( i=0; i<ai[m]-1; i++ ) {aj[i] -= a->cstart - 1;} 1057 1058 /* copy over the B part */ 1059 Aloc = (Mat_AIJ*) a->B->data; 1060 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1061 row = a->rstart; 1062 ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 1063 for ( i=0; i<ai[m]-1; i++ ) {cols[i] = a->garray[aj[i]-1];} 1064 for ( i=0; i<m; i++ ) { 1065 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERTVALUES); 1066 CHKERRQ(ierr); 1067 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1068 } 1069 PETSCFREE(ct); 1070 ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1071 ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1072 *Bin = B; 1073 return 0; 1074 } 1075 1076 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1077 static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1078 1079 /* -------------------------------------------------------------------*/ 1080 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1081 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1082 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1083 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1084 0,0,0,0, 1085 0,0, 1086 MatRelax_MPIAIJ, 1087 MatTranspose_MPIAIJ, 1088 MatGetInfo_MPIAIJ,0, 1089 MatGetDiagonal_MPIAIJ,0,0, 1090 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1091 0, 1092 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,0, 1093 0,0,0,0, 1094 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1095 0,0, 1096 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1097 1098 /*@ 1099 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1100 (the default parallel PETSc format). 1101 1102 Input Parameters: 1103 . comm - MPI communicator 1104 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1105 . n - number of local columns (or PETSC_DECIDE to have calculated 1106 if N is given) 1107 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1108 . N - number of global columns (or PETSC_DECIDE to have calculated 1109 if n is given) 1110 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1111 (same for all local rows) 1112 . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1113 or null (possibly different for each row). You must leave room 1114 for the diagonal entry even if it is zero. 1115 . o_nz - number of nonzeros per row in off-diagonal portion of local 1116 submatrix (same for all local rows). 1117 . o_nzz - number of nonzeros per row in off-diagonal portion of local 1118 submatrix or null (possibly different for each row). 1119 1120 Output Parameter: 1121 . newmat - the matrix 1122 1123 Notes: 1124 The AIJ format (also called the Yale sparse matrix format or 1125 compressed row storage), is fully compatible with standard Fortran 77 1126 storage. That is, the stored row and column indices begin at 1127 one, not zero. See the users manual for further details. 1128 1129 The user MUST specify either the local or global matrix dimensions 1130 (possibly both). 1131 1132 Storage Information: 1133 For a square global matrix we define each processor's diagonal portion 1134 to be its local rows and the corresponding columns (a square submatrix); 1135 each processor's off-diagonal portion encompasses the remainder of the 1136 local matrix (a rectangular submatrix). 1137 1138 The user can specify preallocated storage for the diagonal part of 1139 the local submatrix with either d_nz or d_nnz (not both). Set both 1140 d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1141 Likewise, specify preallocated storage for the off-diagonal part of 1142 the local submatrix with o_nz or o_nnz (not both). 1143 1144 .keywords: matrix, aij, compressed row, sparse, parallel 1145 1146 .seealso: MatCreate(), MatCreateSequentialAIJ(), MatSetValues() 1147 @*/ 1148 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1149 int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1150 { 1151 Mat mat; 1152 Mat_MPIAIJ *aij; 1153 int ierr, i,sum[2],work[2]; 1154 *newmat = 0; 1155 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1156 PLogObjectCreate(mat); 1157 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1158 mat->ops = &MatOps; 1159 mat->destroy = MatDestroy_MPIAIJ; 1160 mat->view = MatView_MPIAIJ; 1161 mat->factor = 0; 1162 1163 aij->insertmode = NOTSETVALUES; 1164 MPI_Comm_rank(comm,&aij->mytid); 1165 MPI_Comm_size(comm,&aij->numtids); 1166 1167 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1168 work[0] = m; work[1] = n; 1169 MPI_Allreduce((void *) work,(void *) sum,2,MPI_INT,MPI_SUM,comm ); 1170 if (M == PETSC_DECIDE) M = sum[0]; 1171 if (N == PETSC_DECIDE) N = sum[1]; 1172 } 1173 if (m == PETSC_DECIDE) 1174 {m = M/aij->numtids + ((M % aij->numtids) > aij->mytid);} 1175 if (n == PETSC_DECIDE) 1176 {n = N/aij->numtids + ((N % aij->numtids) > aij->mytid);} 1177 aij->m = m; 1178 aij->n = n; 1179 aij->N = N; 1180 aij->M = M; 1181 1182 /* build local table of row and column ownerships */ 1183 aij->rowners = (int *) PETSCMALLOC(2*(aij->numtids+2)*sizeof(int)); 1184 CHKPTRQ(aij->rowners); 1185 aij->cowners = aij->rowners + aij->numtids + 1; 1186 MPI_Allgather(&m,1,MPI_INT,aij->rowners+1,1,MPI_INT,comm); 1187 aij->rowners[0] = 0; 1188 for ( i=2; i<=aij->numtids; i++ ) { 1189 aij->rowners[i] += aij->rowners[i-1]; 1190 } 1191 aij->rstart = aij->rowners[aij->mytid]; 1192 aij->rend = aij->rowners[aij->mytid+1]; 1193 MPI_Allgather(&n,1,MPI_INT,aij->cowners+1,1,MPI_INT,comm); 1194 aij->cowners[0] = 0; 1195 for ( i=2; i<=aij->numtids; i++ ) { 1196 aij->cowners[i] += aij->cowners[i-1]; 1197 } 1198 aij->cstart = aij->cowners[aij->mytid]; 1199 aij->cend = aij->cowners[aij->mytid+1]; 1200 1201 1202 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&aij->A); 1203 CHKERRQ(ierr); 1204 PLogObjectParent(mat,aij->A); 1205 ierr = MatCreateSequentialAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&aij->B); 1206 CHKERRQ(ierr); 1207 PLogObjectParent(mat,aij->B); 1208 1209 /* build cache for off array entries formed */ 1210 ierr = StashBuild_Private(&aij->stash); CHKERRQ(ierr); 1211 aij->colmap = 0; 1212 aij->garray = 0; 1213 1214 /* stuff used for matrix vector multiply */ 1215 aij->lvec = 0; 1216 aij->Mvctx = 0; 1217 aij->assembled = 0; 1218 1219 *newmat = mat; 1220 return 0; 1221 } 1222 1223 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1224 { 1225 Mat mat; 1226 Mat_MPIAIJ *aij,*oldmat = (Mat_MPIAIJ *) matin->data; 1227 int ierr, len; 1228 *newmat = 0; 1229 1230 if (!oldmat->assembled) SETERRQ(1,"Cannot copy unassembled matrix"); 1231 PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1232 PLogObjectCreate(mat); 1233 mat->data = (void *) (aij = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(aij); 1234 mat->ops = &MatOps; 1235 mat->destroy = MatDestroy_MPIAIJ; 1236 mat->view = MatView_MPIAIJ; 1237 mat->factor = matin->factor; 1238 1239 aij->m = oldmat->m; 1240 aij->n = oldmat->n; 1241 aij->M = oldmat->M; 1242 aij->N = oldmat->N; 1243 1244 aij->assembled = 1; 1245 aij->rstart = oldmat->rstart; 1246 aij->rend = oldmat->rend; 1247 aij->cstart = oldmat->cstart; 1248 aij->cend = oldmat->cend; 1249 aij->numtids = oldmat->numtids; 1250 aij->mytid = oldmat->mytid; 1251 aij->insertmode = NOTSETVALUES; 1252 1253 aij->rowners = (int *) PETSCMALLOC( (aij->numtids+1)*sizeof(int) ); 1254 CHKPTRQ(aij->rowners); 1255 PETSCMEMCPY(aij->rowners,oldmat->rowners,(aij->numtids+1)*sizeof(int)); 1256 ierr = StashInitialize_Private(&aij->stash); CHKERRQ(ierr); 1257 if (oldmat->colmap) { 1258 aij->colmap = (int *) PETSCMALLOC( (aij->N)*sizeof(int) ); 1259 CHKPTRQ(aij->colmap); 1260 PETSCMEMCPY(aij->colmap,oldmat->colmap,(aij->N)*sizeof(int)); 1261 } else aij->colmap = 0; 1262 if (oldmat->garray && (len = ((Mat_AIJ *) (oldmat->B->data))->n)) { 1263 aij->garray = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(aij->garray); 1264 PETSCMEMCPY(aij->garray,oldmat->garray,len*sizeof(int)); 1265 } else aij->garray = 0; 1266 1267 ierr = VecDuplicate(oldmat->lvec,&aij->lvec); CHKERRQ(ierr); 1268 PLogObjectParent(mat,aij->lvec); 1269 ierr = VecScatterCtxCopy(oldmat->Mvctx,&aij->Mvctx); CHKERRQ(ierr); 1270 PLogObjectParent(mat,aij->Mvctx); 1271 ierr = MatConvert(oldmat->A,MATSAME,&aij->A); CHKERRQ(ierr); 1272 PLogObjectParent(mat,aij->A); 1273 ierr = MatConvert(oldmat->B,MATSAME,&aij->B); CHKERRQ(ierr); 1274 PLogObjectParent(mat,aij->B); 1275 *newmat = mat; 1276 return 0; 1277 } 1278