1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.186 1997/01/14 17:18:18 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 #undef __FUNC__ 15 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20 int n = B->n,i; 21 22 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 23 PLogObjectMemory(mat,aij->N*sizeof(int)); 24 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 25 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 26 return 0; 27 } 28 29 extern int DisAssemble_MPIAIJ(Mat); 30 31 #undef __FUNC__ 32 #define __FUNC__ "MatGetRowIJ_MPIAIJ" 33 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 34 PetscTruth *done) 35 { 36 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 37 int ierr; 38 if (aij->size == 1) { 39 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 40 } else SETERRQ(1,0,"not supported in parallel"); 41 return 0; 42 } 43 44 #undef __FUNC__ 45 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" 46 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 47 PetscTruth *done) 48 { 49 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 50 int ierr; 51 if (aij->size == 1) { 52 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 53 } else SETERRQ(1,0,"not supported in parallel"); 54 return 0; 55 } 56 57 #define CHUNKSIZE 15 58 #define MatSetValues_SeqAIJ_A_Private(A,row,col,value,addv) \ 59 { \ 60 Mat_SeqAIJ *a = (Mat_SeqAIJ *) (A)->data; \ 61 int *rp,ii,nrow,_i,rmax, N, col1; \ 62 int *imax = a->imax, *ai = a->i, *ailen = a->ilen; \ 63 int *aj = a->j, nonew = a->nonew,shift = a->indexshift; \ 64 Scalar *ap, *aa = a->a; \ 65 \ 66 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 67 rmax = imax[row]; nrow = ailen[row]; \ 68 col1 = col - shift; \ 69 \ 70 for ( _i=0; _i<nrow; _i++ ) { \ 71 if (rp[_i] > col1) break; \ 72 if (rp[_i] == col1) { \ 73 if (addv == ADD_VALUES) ap[_i] += value; \ 74 else ap[_i] = value; \ 75 goto _noinsert; \ 76 } \ 77 } \ 78 if (nonew) goto _noinsert; \ 79 if (nrow >= rmax) { \ 80 /* there is no extra room in row, therefore enlarge */ \ 81 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 82 Scalar *new_a; \ 83 \ 84 /* malloc new storage space */ \ 85 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 86 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 87 new_j = (int *) (new_a + new_nz); \ 88 new_i = new_j + new_nz; \ 89 \ 90 /* copy over old data into new slots */ \ 91 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 92 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 93 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 94 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 95 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 96 len*sizeof(int)); \ 97 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 98 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 99 len*sizeof(Scalar)); \ 100 /* free up old matrix storage */ \ 101 \ 102 PetscFree(a->a); \ 103 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 104 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 105 a->singlemalloc = 1; \ 106 \ 107 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 108 rmax = imax[row] = imax[row] + CHUNKSIZE; \ 109 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 110 a->maxnz += CHUNKSIZE; \ 111 a->reallocs++; \ 112 } \ 113 N = nrow++ - 1; a->nz++; \ 114 /* shift up all the later entries in this row */ \ 115 for ( ii=N; ii>=_i; ii-- ) { \ 116 rp[ii+1] = rp[ii]; \ 117 ap[ii+1] = ap[ii]; \ 118 } \ 119 rp[_i] = col1; \ 120 ap[_i] = value; \ 121 _noinsert: ; \ 122 ailen[row] = nrow; \ 123 } 124 125 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 126 #undef __FUNC__ 127 #define __FUNC__ "MatSetValues_MPIAIJ" 128 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 129 { 130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 131 Scalar value; 132 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 133 int cstart = aij->cstart, cend = aij->cend,row,col; 134 int roworiented = aij->roworiented; 135 136 /* Some Variables required in the macro */ 137 /* Mat A = aij->A; 138 Mat_SeqAIJ *a = (Mat_SeqAIJ *) (A)->data; 139 int *rp,ii,nrow,_i,rmax,N,col2; 140 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 141 int *aj=a->j,shift=a->indexshift; 142 Scalar *ap,*aa=a->a; 143 */ 144 #if defined(PETSC_BOPT_g) 145 if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 146 SETERRQ(1,0,"Cannot mix inserts and adds"); 147 } 148 #endif 149 aij->insertmode = addv; 150 for ( i=0; i<m; i++ ) { 151 #if defined(PETSC_BOPT_g) 152 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 153 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 154 #endif 155 if (im[i] >= rstart && im[i] < rend) { 156 row = im[i] - rstart; 157 for ( j=0; j<n; j++ ) { 158 if (in[j] >= cstart && in[j] < cend){ 159 col = in[j] - cstart; 160 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 161 MatSetValues_SeqAIJ_A_Private(aij->A,row,col,value,addv); 162 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 163 } 164 #if defined(PETSC_BOPT_g) 165 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 166 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 167 #endif 168 else { 169 if (mat->was_assembled) { 170 if (!aij->colmap) { 171 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 172 } 173 col = aij->colmap[in[j]] - 1; 174 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 175 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 176 col = in[j]; 177 } 178 } 179 else col = in[j]; 180 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 181 ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 182 } 183 } 184 } 185 else { 186 if (roworiented && !aij->donotstash) { 187 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 188 } 189 else { 190 if (!aij->donotstash) { 191 row = im[i]; 192 for ( j=0; j<n; j++ ) { 193 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 194 } 195 } 196 } 197 } 198 } 199 return 0; 200 } 201 202 #undef __FUNC__ 203 #define __FUNC__ "MatGetValues_MPIAIJ" 204 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 205 { 206 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 207 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 208 int cstart = aij->cstart, cend = aij->cend,row,col; 209 210 for ( i=0; i<m; i++ ) { 211 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 212 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 213 if (idxm[i] >= rstart && idxm[i] < rend) { 214 row = idxm[i] - rstart; 215 for ( j=0; j<n; j++ ) { 216 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 217 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 218 if (idxn[j] >= cstart && idxn[j] < cend){ 219 col = idxn[j] - cstart; 220 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 221 } 222 else { 223 if (!aij->colmap) { 224 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 225 } 226 col = aij->colmap[idxn[j]] - 1; 227 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 228 else { 229 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 230 } 231 } 232 } 233 } 234 else { 235 SETERRQ(1,0,"Only local values currently supported"); 236 } 237 } 238 return 0; 239 } 240 241 #undef __FUNC__ 242 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 243 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 244 { 245 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 246 MPI_Comm comm = mat->comm; 247 int size = aij->size, *owners = aij->rowners; 248 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 249 MPI_Request *send_waits,*recv_waits; 250 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 251 InsertMode addv; 252 Scalar *rvalues,*svalues; 253 254 /* make sure all processors are either in INSERTMODE or ADDMODE */ 255 MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 256 if (addv == (ADD_VALUES|INSERT_VALUES)) { 257 SETERRQ(1,0,"Some processors inserted others added"); 258 } 259 aij->insertmode = addv; /* in case this processor had no cache */ 260 261 /* first count number of contributors to each processor */ 262 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 263 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 264 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 265 for ( i=0; i<aij->stash.n; i++ ) { 266 idx = aij->stash.idx[i]; 267 for ( j=0; j<size; j++ ) { 268 if (idx >= owners[j] && idx < owners[j+1]) { 269 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 270 } 271 } 272 } 273 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 274 275 /* inform other processors of number of messages and max length*/ 276 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 277 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 278 nreceives = work[rank]; 279 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 280 nmax = work[rank]; 281 PetscFree(work); 282 283 /* post receives: 284 1) each message will consist of ordered pairs 285 (global index,value) we store the global index as a double 286 to simplify the message passing. 287 2) since we don't know how long each individual message is we 288 allocate the largest needed buffer for each receive. Potentially 289 this is a lot of wasted space. 290 291 292 This could be done better. 293 */ 294 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 295 CHKPTRQ(rvalues); 296 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 297 CHKPTRQ(recv_waits); 298 for ( i=0; i<nreceives; i++ ) { 299 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 300 comm,recv_waits+i); 301 } 302 303 /* do sends: 304 1) starts[i] gives the starting index in svalues for stuff going to 305 the ith processor 306 */ 307 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 308 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 309 CHKPTRQ(send_waits); 310 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 311 starts[0] = 0; 312 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 313 for ( i=0; i<aij->stash.n; i++ ) { 314 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 315 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 316 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 317 } 318 PetscFree(owner); 319 starts[0] = 0; 320 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 321 count = 0; 322 for ( i=0; i<size; i++ ) { 323 if (procs[i]) { 324 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 325 comm,send_waits+count++); 326 } 327 } 328 PetscFree(starts); PetscFree(nprocs); 329 330 /* Free cache space */ 331 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 332 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 333 334 aij->svalues = svalues; aij->rvalues = rvalues; 335 aij->nsends = nsends; aij->nrecvs = nreceives; 336 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 337 aij->rmax = nmax; 338 339 return 0; 340 } 341 extern int MatSetUpMultiply_MPIAIJ(Mat); 342 343 #undef __FUNC__ 344 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 345 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 348 MPI_Status *send_status,recv_status; 349 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 350 int row,col,other_disassembled; 351 Scalar *values,val; 352 InsertMode addv = aij->insertmode; 353 354 /* wait on receives */ 355 while (count) { 356 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 357 /* unpack receives into our local space */ 358 values = aij->rvalues + 3*imdex*aij->rmax; 359 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 360 n = n/3; 361 for ( i=0; i<n; i++ ) { 362 row = (int) PetscReal(values[3*i]) - aij->rstart; 363 col = (int) PetscReal(values[3*i+1]); 364 val = values[3*i+2]; 365 if (col >= aij->cstart && col < aij->cend) { 366 col -= aij->cstart; 367 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 368 } 369 else { 370 if (mat->was_assembled || mat->assembled) { 371 if (!aij->colmap) { 372 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 373 } 374 col = aij->colmap[col] - 1; 375 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 376 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 377 col = (int) PetscReal(values[3*i+1]); 378 } 379 } 380 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 381 } 382 } 383 count--; 384 } 385 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 386 387 /* wait on sends */ 388 if (aij->nsends) { 389 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 390 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 391 PetscFree(send_status); 392 } 393 PetscFree(aij->send_waits); PetscFree(aij->svalues); 394 395 aij->insertmode = NOT_SET_VALUES; 396 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 398 399 /* determine if any processor has disassembled, if so we must 400 also disassemble ourselfs, in order that we may reassemble. */ 401 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 402 if (mat->was_assembled && !other_disassembled) { 403 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 404 } 405 406 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 407 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 408 } 409 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 410 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 411 412 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 413 return 0; 414 } 415 416 #undef __FUNC__ 417 #define __FUNC__ "MatZeroEntries_MPIAIJ" 418 static int MatZeroEntries_MPIAIJ(Mat A) 419 { 420 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 421 int ierr; 422 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 423 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 424 return 0; 425 } 426 427 /* the code does not do the diagonal entries correctly unless the 428 matrix is square and the column and row owerships are identical. 429 This is a BUG. The only way to fix it seems to be to access 430 aij->A and aij->B directly and not through the MatZeroRows() 431 routine. 432 */ 433 #undef __FUNC__ 434 #define __FUNC__ "MatZeroRows_MPIAIJ" 435 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 436 { 437 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 438 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 439 int *procs,*nprocs,j,found,idx,nsends,*work; 440 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 441 int *rvalues,tag = A->tag,count,base,slen,n,*source; 442 int *lens,imdex,*lrows,*values; 443 MPI_Comm comm = A->comm; 444 MPI_Request *send_waits,*recv_waits; 445 MPI_Status recv_status,*send_status; 446 IS istmp; 447 448 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 449 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 450 451 /* first count number of contributors to each processor */ 452 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 453 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 454 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 455 for ( i=0; i<N; i++ ) { 456 idx = rows[i]; 457 found = 0; 458 for ( j=0; j<size; j++ ) { 459 if (idx >= owners[j] && idx < owners[j+1]) { 460 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 461 } 462 } 463 if (!found) SETERRQ(1,0,"Index out of range"); 464 } 465 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 466 467 /* inform other processors of number of messages and max length*/ 468 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 469 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 470 nrecvs = work[rank]; 471 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 472 nmax = work[rank]; 473 PetscFree(work); 474 475 /* post receives: */ 476 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 477 CHKPTRQ(rvalues); 478 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 479 CHKPTRQ(recv_waits); 480 for ( i=0; i<nrecvs; i++ ) { 481 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 482 } 483 484 /* do sends: 485 1) starts[i] gives the starting index in svalues for stuff going to 486 the ith processor 487 */ 488 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 489 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 490 CHKPTRQ(send_waits); 491 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 492 starts[0] = 0; 493 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 494 for ( i=0; i<N; i++ ) { 495 svalues[starts[owner[i]]++] = rows[i]; 496 } 497 ISRestoreIndices(is,&rows); 498 499 starts[0] = 0; 500 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 501 count = 0; 502 for ( i=0; i<size; i++ ) { 503 if (procs[i]) { 504 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 505 } 506 } 507 PetscFree(starts); 508 509 base = owners[rank]; 510 511 /* wait on receives */ 512 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 513 source = lens + nrecvs; 514 count = nrecvs; slen = 0; 515 while (count) { 516 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 517 /* unpack receives into our local space */ 518 MPI_Get_count(&recv_status,MPI_INT,&n); 519 source[imdex] = recv_status.MPI_SOURCE; 520 lens[imdex] = n; 521 slen += n; 522 count--; 523 } 524 PetscFree(recv_waits); 525 526 /* move the data into the send scatter */ 527 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 528 count = 0; 529 for ( i=0; i<nrecvs; i++ ) { 530 values = rvalues + i*nmax; 531 for ( j=0; j<lens[i]; j++ ) { 532 lrows[count++] = values[j] - base; 533 } 534 } 535 PetscFree(rvalues); PetscFree(lens); 536 PetscFree(owner); PetscFree(nprocs); 537 538 /* actually zap the local rows */ 539 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 540 PLogObjectParent(A,istmp); 541 PetscFree(lrows); 542 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 543 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 544 ierr = ISDestroy(istmp); CHKERRQ(ierr); 545 546 /* wait on sends */ 547 if (nsends) { 548 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 549 CHKPTRQ(send_status); 550 MPI_Waitall(nsends,send_waits,send_status); 551 PetscFree(send_status); 552 } 553 PetscFree(send_waits); PetscFree(svalues); 554 555 return 0; 556 } 557 558 #undef __FUNC__ 559 #define __FUNC__ "MatMult_MPIAIJ" 560 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 561 { 562 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 563 int ierr,nt; 564 565 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 566 if (nt != a->n) { 567 SETERRQ(1,0,"Incompatible parition of A and xx"); 568 } 569 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 570 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 571 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 572 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 573 return 0; 574 } 575 576 #undef __FUNC__ 577 #define __FUNC__ "MatMultAdd_MPIAIJ" 578 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 581 int ierr; 582 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 583 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 584 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 586 return 0; 587 } 588 589 #undef __FUNC__ 590 #define __FUNC__ "MatMultTrans_MPIAIJ" 591 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 594 int ierr; 595 596 /* do nondiagonal part */ 597 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 598 /* send it on its way */ 599 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 600 /* do local part */ 601 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 602 /* receive remote parts: note this assumes the values are not actually */ 603 /* inserted in yy until the next line, which is true for my implementation*/ 604 /* but is not perhaps always true. */ 605 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 606 return 0; 607 } 608 609 #undef __FUNC__ 610 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 611 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 612 { 613 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 614 int ierr; 615 616 /* do nondiagonal part */ 617 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 618 /* send it on its way */ 619 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 620 /* do local part */ 621 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 622 /* receive remote parts: note this assumes the values are not actually */ 623 /* inserted in yy until the next line, which is true for my implementation*/ 624 /* but is not perhaps always true. */ 625 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 626 return 0; 627 } 628 629 /* 630 This only works correctly for square matrices where the subblock A->A is the 631 diagonal block 632 */ 633 #undef __FUNC__ 634 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 635 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 636 { 637 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 638 if (a->M != a->N) 639 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 640 if (a->rstart != a->cstart || a->rend != a->cend) { 641 SETERRQ(1,0,"row partition must equal col partition"); } 642 return MatGetDiagonal(a->A,v); 643 } 644 645 #undef __FUNC__ 646 #define __FUNC__ "MatScale_MPIAIJ" 647 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 648 { 649 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 650 int ierr; 651 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 652 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 653 return 0; 654 } 655 656 #undef __FUNC__ 657 #define __FUNC__ "MatDestroy_MPIAIJ" 658 static int MatDestroy_MPIAIJ(PetscObject obj) 659 { 660 Mat mat = (Mat) obj; 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 662 int ierr; 663 #if defined(PETSC_LOG) 664 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 665 #endif 666 PetscFree(aij->rowners); 667 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 668 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 669 if (aij->colmap) PetscFree(aij->colmap); 670 if (aij->garray) PetscFree(aij->garray); 671 if (aij->lvec) VecDestroy(aij->lvec); 672 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 673 if (aij->rowvalues) PetscFree(aij->rowvalues); 674 PetscFree(aij); 675 if (mat->mapping) { 676 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 677 } 678 PLogObjectDestroy(mat); 679 PetscHeaderDestroy(mat); 680 return 0; 681 } 682 #include "draw.h" 683 #include "pinclude/pviewer.h" 684 685 #undef __FUNC__ 686 #define __FUNC__ "MatView_MPIAIJ_Binary" 687 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 688 { 689 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 690 int ierr; 691 692 if (aij->size == 1) { 693 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 694 } 695 else SETERRQ(1,0,"Only uniprocessor output supported"); 696 return 0; 697 } 698 699 #undef __FUNC__ 700 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 701 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 702 { 703 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 704 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 705 int ierr, format,shift = C->indexshift,rank; 706 FILE *fd; 707 ViewerType vtype; 708 709 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 710 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 711 ierr = ViewerGetFormat(viewer,&format); 712 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 713 MatInfo info; 714 int flg; 715 MPI_Comm_rank(mat->comm,&rank); 716 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 717 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 718 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 719 PetscSequentialPhaseBegin(mat->comm,1); 720 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 721 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 722 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 723 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 724 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 725 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 726 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 727 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 728 fflush(fd); 729 PetscSequentialPhaseEnd(mat->comm,1); 730 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 731 return 0; 732 } 733 else if (format == VIEWER_FORMAT_ASCII_INFO) { 734 return 0; 735 } 736 } 737 738 if (vtype == DRAW_VIEWER) { 739 Draw draw; 740 PetscTruth isnull; 741 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 742 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 743 } 744 745 if (vtype == ASCII_FILE_VIEWER) { 746 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 747 PetscSequentialPhaseBegin(mat->comm,1); 748 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 749 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 750 aij->cend); 751 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 752 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 753 fflush(fd); 754 PetscSequentialPhaseEnd(mat->comm,1); 755 } 756 else { 757 int size = aij->size; 758 rank = aij->rank; 759 if (size == 1) { 760 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 761 } 762 else { 763 /* assemble the entire matrix onto first processor. */ 764 Mat A; 765 Mat_SeqAIJ *Aloc; 766 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 767 Scalar *a; 768 769 if (!rank) { 770 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 771 CHKERRQ(ierr); 772 } 773 else { 774 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 775 CHKERRQ(ierr); 776 } 777 PLogObjectParent(mat,A); 778 779 /* copy over the A part */ 780 Aloc = (Mat_SeqAIJ*) aij->A->data; 781 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 782 row = aij->rstart; 783 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 784 for ( i=0; i<m; i++ ) { 785 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 786 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 787 } 788 aj = Aloc->j; 789 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 790 791 /* copy over the B part */ 792 Aloc = (Mat_SeqAIJ*) aij->B->data; 793 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 794 row = aij->rstart; 795 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 796 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 797 for ( i=0; i<m; i++ ) { 798 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 799 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 800 } 801 PetscFree(ct); 802 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 803 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 804 if (!rank) { 805 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 806 } 807 ierr = MatDestroy(A); CHKERRQ(ierr); 808 } 809 } 810 return 0; 811 } 812 813 #undef __FUNC__ 814 #define __FUNC__ "MatView_MPIAIJ" 815 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 816 { 817 Mat mat = (Mat) obj; 818 int ierr; 819 ViewerType vtype; 820 821 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 822 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 823 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 824 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 825 } 826 else if (vtype == BINARY_FILE_VIEWER) { 827 return MatView_MPIAIJ_Binary(mat,viewer); 828 } 829 return 0; 830 } 831 832 /* 833 This has to provide several versions. 834 835 2) a) use only local smoothing updating outer values only once. 836 b) local smoothing updating outer values each inner iteration 837 3) color updating out values betwen colors. 838 */ 839 #undef __FUNC__ 840 #define __FUNC__ "MatRelax_MPIAIJ" 841 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 842 double fshift,int its,Vec xx) 843 { 844 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 845 Mat AA = mat->A, BB = mat->B; 846 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 847 Scalar *b,*x,*xs,*ls,d,*v,sum; 848 int ierr,*idx, *diag; 849 int n = mat->n, m = mat->m, i,shift = A->indexshift; 850 851 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 852 xs = x + shift; /* shift by one for index start of 1 */ 853 ls = ls + shift; 854 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 855 diag = A->diag; 856 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 857 if (flag & SOR_ZERO_INITIAL_GUESS) { 858 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 859 } 860 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 861 CHKERRQ(ierr); 862 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 863 CHKERRQ(ierr); 864 while (its--) { 865 /* go down through the rows */ 866 for ( i=0; i<m; i++ ) { 867 n = A->i[i+1] - A->i[i]; 868 idx = A->j + A->i[i] + shift; 869 v = A->a + A->i[i] + shift; 870 sum = b[i]; 871 SPARSEDENSEMDOT(sum,xs,v,idx,n); 872 d = fshift + A->a[diag[i]+shift]; 873 n = B->i[i+1] - B->i[i]; 874 idx = B->j + B->i[i] + shift; 875 v = B->a + B->i[i] + shift; 876 SPARSEDENSEMDOT(sum,ls,v,idx,n); 877 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 878 } 879 /* come up through the rows */ 880 for ( i=m-1; i>-1; i-- ) { 881 n = A->i[i+1] - A->i[i]; 882 idx = A->j + A->i[i] + shift; 883 v = A->a + A->i[i] + shift; 884 sum = b[i]; 885 SPARSEDENSEMDOT(sum,xs,v,idx,n); 886 d = fshift + A->a[diag[i]+shift]; 887 n = B->i[i+1] - B->i[i]; 888 idx = B->j + B->i[i] + shift; 889 v = B->a + B->i[i] + shift; 890 SPARSEDENSEMDOT(sum,ls,v,idx,n); 891 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 892 } 893 } 894 } 895 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 896 if (flag & SOR_ZERO_INITIAL_GUESS) { 897 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 898 } 899 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 900 CHKERRQ(ierr); 901 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 902 CHKERRQ(ierr); 903 while (its--) { 904 for ( i=0; i<m; i++ ) { 905 n = A->i[i+1] - A->i[i]; 906 idx = A->j + A->i[i] + shift; 907 v = A->a + A->i[i] + shift; 908 sum = b[i]; 909 SPARSEDENSEMDOT(sum,xs,v,idx,n); 910 d = fshift + A->a[diag[i]+shift]; 911 n = B->i[i+1] - B->i[i]; 912 idx = B->j + B->i[i] + shift; 913 v = B->a + B->i[i] + shift; 914 SPARSEDENSEMDOT(sum,ls,v,idx,n); 915 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 916 } 917 } 918 } 919 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 920 if (flag & SOR_ZERO_INITIAL_GUESS) { 921 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 922 } 923 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 924 mat->Mvctx); CHKERRQ(ierr); 925 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 926 mat->Mvctx); CHKERRQ(ierr); 927 while (its--) { 928 for ( i=m-1; i>-1; i-- ) { 929 n = A->i[i+1] - A->i[i]; 930 idx = A->j + A->i[i] + shift; 931 v = A->a + A->i[i] + shift; 932 sum = b[i]; 933 SPARSEDENSEMDOT(sum,xs,v,idx,n); 934 d = fshift + A->a[diag[i]+shift]; 935 n = B->i[i+1] - B->i[i]; 936 idx = B->j + B->i[i] + shift; 937 v = B->a + B->i[i] + shift; 938 SPARSEDENSEMDOT(sum,ls,v,idx,n); 939 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 940 } 941 } 942 } 943 else { 944 SETERRQ(1,0,"Option not supported"); 945 } 946 return 0; 947 } 948 949 #undef __FUNC__ 950 #define __FUNC__ "MatGetInfo_MPIAIJ" 951 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 952 { 953 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 954 Mat A = mat->A, B = mat->B; 955 int ierr; 956 double isend[5], irecv[5]; 957 958 info->rows_global = (double)mat->M; 959 info->columns_global = (double)mat->N; 960 info->rows_local = (double)mat->m; 961 info->columns_local = (double)mat->N; 962 info->block_size = 1.0; 963 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 964 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 965 isend[3] = info->memory; isend[4] = info->mallocs; 966 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 967 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 968 isend[3] += info->memory; isend[4] += info->mallocs; 969 if (flag == MAT_LOCAL) { 970 info->nz_used = isend[0]; 971 info->nz_allocated = isend[1]; 972 info->nz_unneeded = isend[2]; 973 info->memory = isend[3]; 974 info->mallocs = isend[4]; 975 } else if (flag == MAT_GLOBAL_MAX) { 976 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 977 info->nz_used = irecv[0]; 978 info->nz_allocated = irecv[1]; 979 info->nz_unneeded = irecv[2]; 980 info->memory = irecv[3]; 981 info->mallocs = irecv[4]; 982 } else if (flag == MAT_GLOBAL_SUM) { 983 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 984 info->nz_used = irecv[0]; 985 info->nz_allocated = irecv[1]; 986 info->nz_unneeded = irecv[2]; 987 info->memory = irecv[3]; 988 info->mallocs = irecv[4]; 989 } 990 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 991 info->fill_ratio_needed = 0; 992 info->factor_mallocs = 0; 993 994 return 0; 995 } 996 997 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 998 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 999 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1000 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1001 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1002 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1003 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1004 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1005 1006 #undef __FUNC__ 1007 #define __FUNC__ "MatSetOption_MPIAIJ" 1008 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 1009 { 1010 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1011 1012 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1013 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1014 op == MAT_COLUMNS_UNSORTED || 1015 op == MAT_COLUMNS_SORTED) { 1016 MatSetOption(a->A,op); 1017 MatSetOption(a->B,op); 1018 } else if (op == MAT_ROW_ORIENTED) { 1019 a->roworiented = 1; 1020 MatSetOption(a->A,op); 1021 MatSetOption(a->B,op); 1022 } else if (op == MAT_ROWS_SORTED || 1023 op == MAT_ROWS_UNSORTED || 1024 op == MAT_SYMMETRIC || 1025 op == MAT_STRUCTURALLY_SYMMETRIC || 1026 op == MAT_YES_NEW_DIAGONALS) 1027 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1028 else if (op == MAT_COLUMN_ORIENTED) { 1029 a->roworiented = 0; 1030 MatSetOption(a->A,op); 1031 MatSetOption(a->B,op); 1032 } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 1033 a->donotstash = 1; 1034 } else if (op == MAT_NO_NEW_DIAGONALS) 1035 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1036 else 1037 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1038 return 0; 1039 } 1040 1041 #undef __FUNC__ 1042 #define __FUNC__ "MatGetSize_MPIAIJ" 1043 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1044 { 1045 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1046 *m = mat->M; *n = mat->N; 1047 return 0; 1048 } 1049 1050 #undef __FUNC__ 1051 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1052 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1053 { 1054 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1055 *m = mat->m; *n = mat->N; 1056 return 0; 1057 } 1058 1059 #undef __FUNC__ 1060 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1061 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1062 { 1063 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1064 *m = mat->rstart; *n = mat->rend; 1065 return 0; 1066 } 1067 1068 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1069 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1070 1071 #undef __FUNC__ 1072 #define __FUNC__ "MatGetRow_MPIAIJ" 1073 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1074 { 1075 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1076 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1077 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1078 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1079 int *cmap, *idx_p; 1080 1081 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1082 mat->getrowactive = PETSC_TRUE; 1083 1084 if (!mat->rowvalues && (idx || v)) { 1085 /* 1086 allocate enough space to hold information from the longest row. 1087 */ 1088 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1089 int max = 1,m = mat->m,tmp; 1090 for ( i=0; i<m; i++ ) { 1091 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1092 if (max < tmp) { max = tmp; } 1093 } 1094 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1095 CHKPTRQ(mat->rowvalues); 1096 mat->rowindices = (int *) (mat->rowvalues + max); 1097 } 1098 1099 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1100 lrow = row - rstart; 1101 1102 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1103 if (!v) {pvA = 0; pvB = 0;} 1104 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1105 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1106 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1107 nztot = nzA + nzB; 1108 1109 cmap = mat->garray; 1110 if (v || idx) { 1111 if (nztot) { 1112 /* Sort by increasing column numbers, assuming A and B already sorted */ 1113 int imark = -1; 1114 if (v) { 1115 *v = v_p = mat->rowvalues; 1116 for ( i=0; i<nzB; i++ ) { 1117 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1118 else break; 1119 } 1120 imark = i; 1121 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1122 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1123 } 1124 if (idx) { 1125 *idx = idx_p = mat->rowindices; 1126 if (imark > -1) { 1127 for ( i=0; i<imark; i++ ) { 1128 idx_p[i] = cmap[cworkB[i]]; 1129 } 1130 } else { 1131 for ( i=0; i<nzB; i++ ) { 1132 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1133 else break; 1134 } 1135 imark = i; 1136 } 1137 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1138 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1139 } 1140 } 1141 else { 1142 if (idx) *idx = 0; 1143 if (v) *v = 0; 1144 } 1145 } 1146 *nz = nztot; 1147 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1148 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1149 return 0; 1150 } 1151 1152 #undef __FUNC__ 1153 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1154 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1155 { 1156 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1157 if (aij->getrowactive == PETSC_FALSE) { 1158 SETERRQ(1,0,"MatGetRow not called"); 1159 } 1160 aij->getrowactive = PETSC_FALSE; 1161 return 0; 1162 } 1163 1164 #undef __FUNC__ 1165 #define __FUNC__ "MatNorm_MPIAIJ" 1166 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1167 { 1168 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1169 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1170 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1171 double sum = 0.0; 1172 Scalar *v; 1173 1174 if (aij->size == 1) { 1175 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1176 } else { 1177 if (type == NORM_FROBENIUS) { 1178 v = amat->a; 1179 for (i=0; i<amat->nz; i++ ) { 1180 #if defined(PETSC_COMPLEX) 1181 sum += real(conj(*v)*(*v)); v++; 1182 #else 1183 sum += (*v)*(*v); v++; 1184 #endif 1185 } 1186 v = bmat->a; 1187 for (i=0; i<bmat->nz; i++ ) { 1188 #if defined(PETSC_COMPLEX) 1189 sum += real(conj(*v)*(*v)); v++; 1190 #else 1191 sum += (*v)*(*v); v++; 1192 #endif 1193 } 1194 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1195 *norm = sqrt(*norm); 1196 } 1197 else if (type == NORM_1) { /* max column norm */ 1198 double *tmp, *tmp2; 1199 int *jj, *garray = aij->garray; 1200 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1201 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1202 PetscMemzero(tmp,aij->N*sizeof(double)); 1203 *norm = 0.0; 1204 v = amat->a; jj = amat->j; 1205 for ( j=0; j<amat->nz; j++ ) { 1206 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1207 } 1208 v = bmat->a; jj = bmat->j; 1209 for ( j=0; j<bmat->nz; j++ ) { 1210 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1211 } 1212 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1213 for ( j=0; j<aij->N; j++ ) { 1214 if (tmp2[j] > *norm) *norm = tmp2[j]; 1215 } 1216 PetscFree(tmp); PetscFree(tmp2); 1217 } 1218 else if (type == NORM_INFINITY) { /* max row norm */ 1219 double ntemp = 0.0; 1220 for ( j=0; j<amat->m; j++ ) { 1221 v = amat->a + amat->i[j] + shift; 1222 sum = 0.0; 1223 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1224 sum += PetscAbsScalar(*v); v++; 1225 } 1226 v = bmat->a + bmat->i[j] + shift; 1227 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1228 sum += PetscAbsScalar(*v); v++; 1229 } 1230 if (sum > ntemp) ntemp = sum; 1231 } 1232 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1233 } 1234 else { 1235 SETERRQ(1,0,"No support for two norm"); 1236 } 1237 } 1238 return 0; 1239 } 1240 1241 #undef __FUNC__ 1242 #define __FUNC__ "MatTranspose_MPIAIJ" 1243 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1244 { 1245 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1246 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1247 int ierr,shift = Aloc->indexshift; 1248 Mat B; 1249 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1250 Scalar *array; 1251 1252 if (matout == PETSC_NULL && M != N) 1253 SETERRQ(1,0,"Square matrix only for in-place"); 1254 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1255 PETSC_NULL,&B); CHKERRQ(ierr); 1256 1257 /* copy over the A part */ 1258 Aloc = (Mat_SeqAIJ*) a->A->data; 1259 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1260 row = a->rstart; 1261 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1262 for ( i=0; i<m; i++ ) { 1263 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1264 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1265 } 1266 aj = Aloc->j; 1267 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1268 1269 /* copy over the B part */ 1270 Aloc = (Mat_SeqAIJ*) a->B->data; 1271 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1272 row = a->rstart; 1273 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1274 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1275 for ( i=0; i<m; i++ ) { 1276 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1277 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1278 } 1279 PetscFree(ct); 1280 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1281 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1282 if (matout != PETSC_NULL) { 1283 *matout = B; 1284 } else { 1285 /* This isn't really an in-place transpose .... but free data structures from a */ 1286 PetscFree(a->rowners); 1287 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1288 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1289 if (a->colmap) PetscFree(a->colmap); 1290 if (a->garray) PetscFree(a->garray); 1291 if (a->lvec) VecDestroy(a->lvec); 1292 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1293 PetscFree(a); 1294 PetscMemcpy(A,B,sizeof(struct _Mat)); 1295 PetscHeaderDestroy(B); 1296 } 1297 return 0; 1298 } 1299 1300 #undef __FUNC__ 1301 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1302 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1303 { 1304 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1305 Mat a = aij->A, b = aij->B; 1306 int ierr,s1,s2,s3; 1307 1308 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1309 if (rr) { 1310 s3 = aij->n; 1311 VecGetLocalSize_Fast(rr,s1); 1312 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1313 /* Overlap communication with computation. */ 1314 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1315 } 1316 if (ll) { 1317 VecGetLocalSize_Fast(ll,s1); 1318 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1319 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1320 } 1321 /* scale the diagonal block */ 1322 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1323 1324 if (rr) { 1325 /* Do a scatter end and then right scale the off-diagonal block */ 1326 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1327 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1328 } 1329 1330 return 0; 1331 } 1332 1333 1334 extern int MatPrintHelp_SeqAIJ(Mat); 1335 #undef __FUNC__ 1336 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1337 static int MatPrintHelp_MPIAIJ(Mat A) 1338 { 1339 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1340 1341 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1342 else return 0; 1343 } 1344 1345 #undef __FUNC__ 1346 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1347 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1348 { 1349 *bs = 1; 1350 return 0; 1351 } 1352 #undef __FUNC__ 1353 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1354 static int MatSetUnfactored_MPIAIJ(Mat A) 1355 { 1356 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1357 int ierr; 1358 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1359 return 0; 1360 } 1361 1362 1363 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1364 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1365 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1366 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1367 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1368 /* -------------------------------------------------------------------*/ 1369 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1370 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1371 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1372 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1373 0,0, 1374 0,0, 1375 0,0, 1376 MatRelax_MPIAIJ, 1377 MatTranspose_MPIAIJ, 1378 MatGetInfo_MPIAIJ,0, 1379 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1380 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1381 0, 1382 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1383 0,0,0,0, 1384 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1385 0,0, 1386 0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0, 1387 0,0,0, 1388 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1389 MatPrintHelp_MPIAIJ, 1390 MatScale_MPIAIJ,0,0,0, 1391 MatGetBlockSize_MPIAIJ,0,0,0,0, 1392 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1393 1394 1395 #undef __FUNC__ 1396 #define __FUNC__ "MatCreateMPIAIJ" 1397 /*@C 1398 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1399 (the default parallel PETSc format). For good matrix assembly performance 1400 the user should preallocate the matrix storage by setting the parameters 1401 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1402 performance can be increased by more than a factor of 50. 1403 1404 Input Parameters: 1405 . comm - MPI communicator 1406 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1407 This value should be the same as the local size used in creating the 1408 y vector for the matrix-vector product y = Ax. 1409 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1410 This value should be the same as the local size used in creating the 1411 x vector for the matrix-vector product y = Ax. 1412 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1413 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1414 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1415 (same for all local rows) 1416 . d_nzz - array containing the number of nonzeros in the various rows of the 1417 diagonal portion of the local submatrix (possibly different for each row) 1418 or PETSC_NULL. You must leave room for the diagonal entry even if 1419 it is zero. 1420 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1421 submatrix (same for all local rows). 1422 . o_nzz - array containing the number of nonzeros in the various rows of the 1423 off-diagonal portion of the local submatrix (possibly different for 1424 each row) or PETSC_NULL. 1425 1426 Output Parameter: 1427 . A - the matrix 1428 1429 Notes: 1430 The AIJ format (also called the Yale sparse matrix format or 1431 compressed row storage), is fully compatible with standard Fortran 77 1432 storage. That is, the stored row and column indices can begin at 1433 either one (as in Fortran) or zero. See the users manual for details. 1434 1435 The user MUST specify either the local or global matrix dimensions 1436 (possibly both). 1437 1438 By default, this format uses inodes (identical nodes) when possible. 1439 We search for consecutive rows with the same nonzero structure, thereby 1440 reusing matrix information to achieve increased efficiency. 1441 1442 Options Database Keys: 1443 $ -mat_aij_no_inode - Do not use inodes 1444 $ -mat_aij_inode_limit <limit> - Set inode limit. 1445 $ (max limit=5) 1446 $ -mat_aij_oneindex - Internally use indexing starting at 1 1447 $ rather than 0. Note: When calling MatSetValues(), 1448 $ the user still MUST index entries starting at 0! 1449 1450 Storage Information: 1451 For a square global matrix we define each processor's diagonal portion 1452 to be its local rows and the corresponding columns (a square submatrix); 1453 each processor's off-diagonal portion encompasses the remainder of the 1454 local matrix (a rectangular submatrix). 1455 1456 The user can specify preallocated storage for the diagonal part of 1457 the local submatrix with either d_nz or d_nnz (not both). Set 1458 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1459 memory allocation. Likewise, specify preallocated storage for the 1460 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1461 1462 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1463 the figure below we depict these three local rows and all columns (0-11). 1464 1465 $ 0 1 2 3 4 5 6 7 8 9 10 11 1466 $ ------------------- 1467 $ row 3 | o o o d d d o o o o o o 1468 $ row 4 | o o o d d d o o o o o o 1469 $ row 5 | o o o d d d o o o o o o 1470 $ ------------------- 1471 $ 1472 1473 Thus, any entries in the d locations are stored in the d (diagonal) 1474 submatrix, and any entries in the o locations are stored in the 1475 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1476 stored simply in the MATSEQAIJ format for compressed row storage. 1477 1478 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1479 and o_nz should indicate the number of nonzeros per row in the o matrix. 1480 In general, for PDE problems in which most nonzeros are near the diagonal, 1481 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1482 or you will get TERRIBLE performance; see the users' manual chapter on 1483 matrices. 1484 1485 .keywords: matrix, aij, compressed row, sparse, parallel 1486 1487 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1488 @*/ 1489 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1490 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1491 { 1492 Mat B; 1493 Mat_MPIAIJ *b; 1494 int ierr, i,sum[2],work[2],size; 1495 1496 *A = 0; 1497 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1498 PLogObjectCreate(B); 1499 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1500 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1501 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1502 MPI_Comm_size(comm,&size); 1503 if (size == 1) { 1504 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1505 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1506 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1507 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1508 B->ops.lufactor = MatLUFactor_MPIAIJ; 1509 B->ops.solve = MatSolve_MPIAIJ; 1510 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1511 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1512 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1513 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1514 } 1515 B->destroy = MatDestroy_MPIAIJ; 1516 B->view = MatView_MPIAIJ; 1517 B->factor = 0; 1518 B->assembled = PETSC_FALSE; 1519 B->mapping = 0; 1520 1521 b->insertmode = NOT_SET_VALUES; 1522 MPI_Comm_rank(comm,&b->rank); 1523 MPI_Comm_size(comm,&b->size); 1524 1525 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1526 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1527 1528 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1529 work[0] = m; work[1] = n; 1530 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1531 if (M == PETSC_DECIDE) M = sum[0]; 1532 if (N == PETSC_DECIDE) N = sum[1]; 1533 } 1534 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1535 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1536 b->m = m; B->m = m; 1537 b->n = n; B->n = n; 1538 b->N = N; B->N = N; 1539 b->M = M; B->M = M; 1540 1541 /* build local table of row and column ownerships */ 1542 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1543 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1544 b->cowners = b->rowners + b->size + 2; 1545 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1546 b->rowners[0] = 0; 1547 for ( i=2; i<=b->size; i++ ) { 1548 b->rowners[i] += b->rowners[i-1]; 1549 } 1550 b->rstart = b->rowners[b->rank]; 1551 b->rend = b->rowners[b->rank+1]; 1552 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1553 b->cowners[0] = 0; 1554 for ( i=2; i<=b->size; i++ ) { 1555 b->cowners[i] += b->cowners[i-1]; 1556 } 1557 b->cstart = b->cowners[b->rank]; 1558 b->cend = b->cowners[b->rank+1]; 1559 1560 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1561 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1562 PLogObjectParent(B,b->A); 1563 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1564 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1565 PLogObjectParent(B,b->B); 1566 1567 /* build cache for off array entries formed */ 1568 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1569 b->donotstash = 0; 1570 b->colmap = 0; 1571 b->garray = 0; 1572 b->roworiented = 1; 1573 1574 /* stuff used for matrix vector multiply */ 1575 b->lvec = 0; 1576 b->Mvctx = 0; 1577 1578 /* stuff for MatGetRow() */ 1579 b->rowindices = 0; 1580 b->rowvalues = 0; 1581 b->getrowactive = PETSC_FALSE; 1582 1583 *A = B; 1584 return 0; 1585 } 1586 1587 #undef __FUNC__ 1588 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1589 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1590 { 1591 Mat mat; 1592 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1593 int ierr, len=0, flg; 1594 1595 *newmat = 0; 1596 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1597 PLogObjectCreate(mat); 1598 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1599 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1600 mat->destroy = MatDestroy_MPIAIJ; 1601 mat->view = MatView_MPIAIJ; 1602 mat->factor = matin->factor; 1603 mat->assembled = PETSC_TRUE; 1604 1605 a->m = mat->m = oldmat->m; 1606 a->n = mat->n = oldmat->n; 1607 a->M = mat->M = oldmat->M; 1608 a->N = mat->N = oldmat->N; 1609 1610 a->rstart = oldmat->rstart; 1611 a->rend = oldmat->rend; 1612 a->cstart = oldmat->cstart; 1613 a->cend = oldmat->cend; 1614 a->size = oldmat->size; 1615 a->rank = oldmat->rank; 1616 a->insertmode = NOT_SET_VALUES; 1617 a->rowvalues = 0; 1618 a->getrowactive = PETSC_FALSE; 1619 1620 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1621 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1622 a->cowners = a->rowners + a->size + 2; 1623 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1624 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1625 if (oldmat->colmap) { 1626 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1627 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1628 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1629 } else a->colmap = 0; 1630 if (oldmat->garray) { 1631 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1632 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1633 PLogObjectMemory(mat,len*sizeof(int)); 1634 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1635 } else a->garray = 0; 1636 1637 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1638 PLogObjectParent(mat,a->lvec); 1639 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1640 PLogObjectParent(mat,a->Mvctx); 1641 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1642 PLogObjectParent(mat,a->A); 1643 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1644 PLogObjectParent(mat,a->B); 1645 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1646 if (flg) { 1647 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1648 } 1649 *newmat = mat; 1650 return 0; 1651 } 1652 1653 #include "sys.h" 1654 1655 #undef __FUNC__ 1656 #define __FUNC__ "MatLoad_MPIAIJ" 1657 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1658 { 1659 Mat A; 1660 int i, nz, ierr, j,rstart, rend, fd; 1661 Scalar *vals,*svals; 1662 MPI_Comm comm = ((PetscObject)viewer)->comm; 1663 MPI_Status status; 1664 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1665 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1666 int tag = ((PetscObject)viewer)->tag; 1667 1668 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1669 if (!rank) { 1670 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1671 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1672 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1673 } 1674 1675 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1676 M = header[1]; N = header[2]; 1677 /* determine ownership of all rows */ 1678 m = M/size + ((M % size) > rank); 1679 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1680 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1681 rowners[0] = 0; 1682 for ( i=2; i<=size; i++ ) { 1683 rowners[i] += rowners[i-1]; 1684 } 1685 rstart = rowners[rank]; 1686 rend = rowners[rank+1]; 1687 1688 /* distribute row lengths to all processors */ 1689 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1690 offlens = ourlens + (rend-rstart); 1691 if (!rank) { 1692 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1693 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1694 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1695 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1696 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1697 PetscFree(sndcounts); 1698 } 1699 else { 1700 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1701 } 1702 1703 if (!rank) { 1704 /* calculate the number of nonzeros on each processor */ 1705 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1706 PetscMemzero(procsnz,size*sizeof(int)); 1707 for ( i=0; i<size; i++ ) { 1708 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1709 procsnz[i] += rowlengths[j]; 1710 } 1711 } 1712 PetscFree(rowlengths); 1713 1714 /* determine max buffer needed and allocate it */ 1715 maxnz = 0; 1716 for ( i=0; i<size; i++ ) { 1717 maxnz = PetscMax(maxnz,procsnz[i]); 1718 } 1719 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1720 1721 /* read in my part of the matrix column indices */ 1722 nz = procsnz[0]; 1723 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1724 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1725 1726 /* read in every one elses and ship off */ 1727 for ( i=1; i<size; i++ ) { 1728 nz = procsnz[i]; 1729 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1730 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1731 } 1732 PetscFree(cols); 1733 } 1734 else { 1735 /* determine buffer space needed for message */ 1736 nz = 0; 1737 for ( i=0; i<m; i++ ) { 1738 nz += ourlens[i]; 1739 } 1740 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1741 1742 /* receive message of column indices*/ 1743 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1744 MPI_Get_count(&status,MPI_INT,&maxnz); 1745 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1746 } 1747 1748 /* loop over local rows, determining number of off diagonal entries */ 1749 PetscMemzero(offlens,m*sizeof(int)); 1750 jj = 0; 1751 for ( i=0; i<m; i++ ) { 1752 for ( j=0; j<ourlens[i]; j++ ) { 1753 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1754 jj++; 1755 } 1756 } 1757 1758 /* create our matrix */ 1759 for ( i=0; i<m; i++ ) { 1760 ourlens[i] -= offlens[i]; 1761 } 1762 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1763 A = *newmat; 1764 MatSetOption(A,MAT_COLUMNS_SORTED); 1765 for ( i=0; i<m; i++ ) { 1766 ourlens[i] += offlens[i]; 1767 } 1768 1769 if (!rank) { 1770 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1771 1772 /* read in my part of the matrix numerical values */ 1773 nz = procsnz[0]; 1774 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1775 1776 /* insert into matrix */ 1777 jj = rstart; 1778 smycols = mycols; 1779 svals = vals; 1780 for ( i=0; i<m; i++ ) { 1781 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1782 smycols += ourlens[i]; 1783 svals += ourlens[i]; 1784 jj++; 1785 } 1786 1787 /* read in other processors and ship out */ 1788 for ( i=1; i<size; i++ ) { 1789 nz = procsnz[i]; 1790 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1791 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1792 } 1793 PetscFree(procsnz); 1794 } 1795 else { 1796 /* receive numeric values */ 1797 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1798 1799 /* receive message of values*/ 1800 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1801 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1802 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1803 1804 /* insert into matrix */ 1805 jj = rstart; 1806 smycols = mycols; 1807 svals = vals; 1808 for ( i=0; i<m; i++ ) { 1809 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1810 smycols += ourlens[i]; 1811 svals += ourlens[i]; 1812 jj++; 1813 } 1814 } 1815 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1816 1817 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1818 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1819 return 0; 1820 } 1821