1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.191 1997/02/22 02:25:15 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 #undef __FUNC__ 15 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20 int n = B->n,i; 21 22 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 23 PLogObjectMemory(mat,aij->N*sizeof(int)); 24 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 25 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 26 return 0; 27 } 28 29 extern int DisAssemble_MPIAIJ(Mat); 30 31 #undef __FUNC__ 32 #define __FUNC__ "MatGetRowIJ_MPIAIJ" 33 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 34 PetscTruth *done) 35 { 36 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 37 int ierr; 38 if (aij->size == 1) { 39 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 40 } else SETERRQ(1,0,"not supported in parallel"); 41 return 0; 42 } 43 44 #undef __FUNC__ 45 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 46 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 47 PetscTruth *done) 48 { 49 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 50 int ierr; 51 if (aij->size == 1) { 52 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 53 } else SETERRQ(1,0,"not supported in parallel"); 54 return 0; 55 } 56 57 #define CHUNKSIZE 15 58 #define MatSetValues_SeqAIJ_A_Private(A,row,col,value,addv) \ 59 { \ 60 \ 61 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 62 rmax = imax[row]; nrow = ailen[row]; \ 63 col1 = col - shift; \ 64 \ 65 for ( _i=0; _i<nrow; _i++ ) { \ 66 if (rp[_i] > col1) break; \ 67 if (rp[_i] == col1) { \ 68 if (addv == ADD_VALUES) ap[_i] += value; \ 69 else ap[_i] = value; \ 70 goto _noinsert; \ 71 } \ 72 } \ 73 if (nonew) goto _noinsert; \ 74 if (nrow >= rmax) { \ 75 /* there is no extra room in row, therefore enlarge */ \ 76 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 77 Scalar *new_a; \ 78 \ 79 /* malloc new storage space */ \ 80 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 81 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 82 new_j = (int *) (new_a + new_nz); \ 83 new_i = new_j + new_nz; \ 84 \ 85 /* copy over old data into new slots */ \ 86 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 87 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 88 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 89 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 90 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 91 len*sizeof(int)); \ 92 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 93 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 94 len*sizeof(Scalar)); \ 95 /* free up old matrix storage */ \ 96 \ 97 PetscFree(a->a); \ 98 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 99 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 100 a->singlemalloc = 1; \ 101 \ 102 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 103 rmax = imax[row] = imax[row] + CHUNKSIZE; \ 104 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 105 a->maxnz += CHUNKSIZE; \ 106 a->reallocs++; \ 107 } \ 108 N = nrow++ - 1; a->nz++; \ 109 /* shift up all the later entries in this row */ \ 110 for ( ii=N; ii>=_i; ii-- ) { \ 111 rp[ii+1] = rp[ii]; \ 112 ap[ii+1] = ap[ii]; \ 113 } \ 114 rp[_i] = col1; \ 115 ap[_i] = value; \ 116 _noinsert: ; \ 117 ailen[row] = nrow; \ 118 } 119 120 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 121 #undef __FUNC__ 122 #define __FUNC__ "MatSetValues_MPIAIJ" 123 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 124 { 125 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 126 Scalar value; 127 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 128 int cstart = aij->cstart, cend = aij->cend,row,col; 129 int roworiented = aij->roworiented; 130 131 /* Some Variables required in the macro */ 132 Mat A = aij->A; 133 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 134 int *rp,ii,nrow,_i,rmax, N, col1; 135 int *imax = a->imax, *ai = a->i, *ailen = a->ilen; 136 int *aj = a->j, nonew = a->nonew,shift = a->indexshift; 137 Scalar *ap, *aa = a->a; 138 139 for ( i=0; i<m; i++ ) { 140 #if defined(PETSC_BOPT_g) 141 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 142 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 143 #endif 144 if (im[i] >= rstart && im[i] < rend) { 145 row = im[i] - rstart; 146 for ( j=0; j<n; j++ ) { 147 if (in[j] >= cstart && in[j] < cend){ 148 col = in[j] - cstart; 149 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 150 MatSetValues_SeqAIJ_A_Private(aij->A,row,col,value,addv); 151 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 152 } 153 #if defined(PETSC_BOPT_g) 154 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 155 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 156 #endif 157 else { 158 if (mat->was_assembled) { 159 if (!aij->colmap) { 160 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 161 } 162 col = aij->colmap[in[j]] - 1; 163 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 164 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 165 col = in[j]; 166 } 167 } 168 else col = in[j]; 169 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 170 ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 171 } 172 } 173 } 174 else { 175 if (roworiented && !aij->donotstash) { 176 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 177 } 178 else { 179 if (!aij->donotstash) { 180 row = im[i]; 181 for ( j=0; j<n; j++ ) { 182 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 183 } 184 } 185 } 186 } 187 } 188 return 0; 189 } 190 191 #undef __FUNC__ 192 #define __FUNC__ "MatGetValues_MPIAIJ" 193 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 194 { 195 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 196 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 197 int cstart = aij->cstart, cend = aij->cend,row,col; 198 199 for ( i=0; i<m; i++ ) { 200 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 201 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 202 if (idxm[i] >= rstart && idxm[i] < rend) { 203 row = idxm[i] - rstart; 204 for ( j=0; j<n; j++ ) { 205 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 206 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 207 if (idxn[j] >= cstart && idxn[j] < cend){ 208 col = idxn[j] - cstart; 209 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 210 } 211 else { 212 if (!aij->colmap) { 213 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 214 } 215 col = aij->colmap[idxn[j]] - 1; 216 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 217 else { 218 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 219 } 220 } 221 } 222 } 223 else { 224 SETERRQ(1,0,"Only local values currently supported"); 225 } 226 } 227 return 0; 228 } 229 230 #undef __FUNC__ 231 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 232 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 233 { 234 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 235 MPI_Comm comm = mat->comm; 236 int size = aij->size, *owners = aij->rowners; 237 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 238 MPI_Request *send_waits,*recv_waits; 239 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 240 InsertMode addv; 241 Scalar *rvalues,*svalues; 242 243 /* make sure all processors are either in INSERTMODE or ADDMODE */ 244 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 245 if (addv == (ADD_VALUES|INSERT_VALUES)) { 246 SETERRQ(1,0,"Some processors inserted others added"); 247 } 248 mat->insertmode = addv; /* in case this processor had no cache */ 249 250 /* first count number of contributors to each processor */ 251 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 252 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 253 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 254 for ( i=0; i<aij->stash.n; i++ ) { 255 idx = aij->stash.idx[i]; 256 for ( j=0; j<size; j++ ) { 257 if (idx >= owners[j] && idx < owners[j+1]) { 258 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 259 } 260 } 261 } 262 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 263 264 /* inform other processors of number of messages and max length*/ 265 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 266 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 267 nreceives = work[rank]; 268 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 269 nmax = work[rank]; 270 PetscFree(work); 271 272 /* post receives: 273 1) each message will consist of ordered pairs 274 (global index,value) we store the global index as a double 275 to simplify the message passing. 276 2) since we don't know how long each individual message is we 277 allocate the largest needed buffer for each receive. Potentially 278 this is a lot of wasted space. 279 280 281 This could be done better. 282 */ 283 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 284 CHKPTRQ(rvalues); 285 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 286 CHKPTRQ(recv_waits); 287 for ( i=0; i<nreceives; i++ ) { 288 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 289 comm,recv_waits+i); 290 } 291 292 /* do sends: 293 1) starts[i] gives the starting index in svalues for stuff going to 294 the ith processor 295 */ 296 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 297 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 298 CHKPTRQ(send_waits); 299 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 300 starts[0] = 0; 301 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 302 for ( i=0; i<aij->stash.n; i++ ) { 303 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 304 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 305 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 306 } 307 PetscFree(owner); 308 starts[0] = 0; 309 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 310 count = 0; 311 for ( i=0; i<size; i++ ) { 312 if (procs[i]) { 313 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 314 comm,send_waits+count++); 315 } 316 } 317 PetscFree(starts); PetscFree(nprocs); 318 319 /* Free cache space */ 320 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 321 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 322 323 aij->svalues = svalues; aij->rvalues = rvalues; 324 aij->nsends = nsends; aij->nrecvs = nreceives; 325 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 326 aij->rmax = nmax; 327 328 return 0; 329 } 330 extern int MatSetUpMultiply_MPIAIJ(Mat); 331 332 #undef __FUNC__ 333 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 334 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 337 MPI_Status *send_status,recv_status; 338 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 339 int row,col,other_disassembled; 340 Scalar *values,val; 341 InsertMode addv = mat->insertmode; 342 343 /* wait on receives */ 344 while (count) { 345 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 346 /* unpack receives into our local space */ 347 values = aij->rvalues + 3*imdex*aij->rmax; 348 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 349 n = n/3; 350 for ( i=0; i<n; i++ ) { 351 row = (int) PetscReal(values[3*i]) - aij->rstart; 352 col = (int) PetscReal(values[3*i+1]); 353 val = values[3*i+2]; 354 if (col >= aij->cstart && col < aij->cend) { 355 col -= aij->cstart; 356 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 357 } 358 else { 359 if (mat->was_assembled || mat->assembled) { 360 if (!aij->colmap) { 361 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 362 } 363 col = aij->colmap[col] - 1; 364 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 365 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 366 col = (int) PetscReal(values[3*i+1]); 367 } 368 } 369 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 370 } 371 } 372 count--; 373 } 374 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 375 376 /* wait on sends */ 377 if (aij->nsends) { 378 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 379 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 380 PetscFree(send_status); 381 } 382 PetscFree(aij->send_waits); PetscFree(aij->svalues); 383 384 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 385 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 386 387 /* determine if any processor has disassembled, if so we must 388 also disassemble ourselfs, in order that we may reassemble. */ 389 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 390 if (mat->was_assembled && !other_disassembled) { 391 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 396 } 397 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 399 400 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 401 return 0; 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatZeroEntries_MPIAIJ" 406 static int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 409 int ierr; 410 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 411 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 412 return 0; 413 } 414 415 /* the code does not do the diagonal entries correctly unless the 416 matrix is square and the column and row owerships are identical. 417 This is a BUG. The only way to fix it seems to be to access 418 aij->A and aij->B directly and not through the MatZeroRows() 419 routine. 420 */ 421 #undef __FUNC__ 422 #define __FUNC__ "MatZeroRows_MPIAIJ" 423 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 424 { 425 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 426 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 427 int *procs,*nprocs,j,found,idx,nsends,*work; 428 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 429 int *rvalues,tag = A->tag,count,base,slen,n,*source; 430 int *lens,imdex,*lrows,*values; 431 MPI_Comm comm = A->comm; 432 MPI_Request *send_waits,*recv_waits; 433 MPI_Status recv_status,*send_status; 434 IS istmp; 435 436 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 437 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 438 439 /* first count number of contributors to each processor */ 440 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 441 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 442 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 443 for ( i=0; i<N; i++ ) { 444 idx = rows[i]; 445 found = 0; 446 for ( j=0; j<size; j++ ) { 447 if (idx >= owners[j] && idx < owners[j+1]) { 448 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 449 } 450 } 451 if (!found) SETERRQ(1,0,"Index out of range"); 452 } 453 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 454 455 /* inform other processors of number of messages and max length*/ 456 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 457 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 458 nrecvs = work[rank]; 459 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 460 nmax = work[rank]; 461 PetscFree(work); 462 463 /* post receives: */ 464 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 465 CHKPTRQ(rvalues); 466 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 467 CHKPTRQ(recv_waits); 468 for ( i=0; i<nrecvs; i++ ) { 469 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 470 } 471 472 /* do sends: 473 1) starts[i] gives the starting index in svalues for stuff going to 474 the ith processor 475 */ 476 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 477 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 478 CHKPTRQ(send_waits); 479 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 480 starts[0] = 0; 481 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 482 for ( i=0; i<N; i++ ) { 483 svalues[starts[owner[i]]++] = rows[i]; 484 } 485 ISRestoreIndices(is,&rows); 486 487 starts[0] = 0; 488 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 489 count = 0; 490 for ( i=0; i<size; i++ ) { 491 if (procs[i]) { 492 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 493 } 494 } 495 PetscFree(starts); 496 497 base = owners[rank]; 498 499 /* wait on receives */ 500 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 501 source = lens + nrecvs; 502 count = nrecvs; slen = 0; 503 while (count) { 504 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 505 /* unpack receives into our local space */ 506 MPI_Get_count(&recv_status,MPI_INT,&n); 507 source[imdex] = recv_status.MPI_SOURCE; 508 lens[imdex] = n; 509 slen += n; 510 count--; 511 } 512 PetscFree(recv_waits); 513 514 /* move the data into the send scatter */ 515 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 516 count = 0; 517 for ( i=0; i<nrecvs; i++ ) { 518 values = rvalues + i*nmax; 519 for ( j=0; j<lens[i]; j++ ) { 520 lrows[count++] = values[j] - base; 521 } 522 } 523 PetscFree(rvalues); PetscFree(lens); 524 PetscFree(owner); PetscFree(nprocs); 525 526 /* actually zap the local rows */ 527 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 528 PLogObjectParent(A,istmp); 529 PetscFree(lrows); 530 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 531 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 532 ierr = ISDestroy(istmp); CHKERRQ(ierr); 533 534 /* wait on sends */ 535 if (nsends) { 536 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 537 CHKPTRQ(send_status); 538 MPI_Waitall(nsends,send_waits,send_status); 539 PetscFree(send_status); 540 } 541 PetscFree(send_waits); PetscFree(svalues); 542 543 return 0; 544 } 545 546 #undef __FUNC__ 547 #define __FUNC__ "MatMult_MPIAIJ" 548 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 549 { 550 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 551 int ierr,nt; 552 553 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 554 if (nt != a->n) { 555 SETERRQ(1,0,"Incompatible partition of A and xx"); 556 } 557 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 558 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 559 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 560 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 561 return 0; 562 } 563 564 #undef __FUNC__ 565 #define __FUNC__ "MatMultAdd_MPIAIJ" 566 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 567 { 568 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 569 int ierr; 570 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 571 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 572 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 573 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 574 return 0; 575 } 576 577 #undef __FUNC__ 578 #define __FUNC__ "MatMultTrans_MPIAIJ" 579 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 580 { 581 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 582 int ierr; 583 584 /* do nondiagonal part */ 585 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 586 /* send it on its way */ 587 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 588 /* do local part */ 589 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 590 /* receive remote parts: note this assumes the values are not actually */ 591 /* inserted in yy until the next line, which is true for my implementation*/ 592 /* but is not perhaps always true. */ 593 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 594 return 0; 595 } 596 597 #undef __FUNC__ 598 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 599 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 602 int ierr; 603 604 /* do nondiagonal part */ 605 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 606 /* send it on its way */ 607 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 608 /* do local part */ 609 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 610 /* receive remote parts: note this assumes the values are not actually */ 611 /* inserted in yy until the next line, which is true for my implementation*/ 612 /* but is not perhaps always true. */ 613 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 614 return 0; 615 } 616 617 /* 618 This only works correctly for square matrices where the subblock A->A is the 619 diagonal block 620 */ 621 #undef __FUNC__ 622 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 623 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 624 { 625 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 626 if (a->M != a->N) 627 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 628 if (a->rstart != a->cstart || a->rend != a->cend) { 629 SETERRQ(1,0,"row partition must equal col partition"); } 630 return MatGetDiagonal(a->A,v); 631 } 632 633 #undef __FUNC__ 634 #define __FUNC__ "MatScale_MPIAIJ" 635 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 636 { 637 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 638 int ierr; 639 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 640 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 641 return 0; 642 } 643 644 #undef __FUNC__ 645 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 646 static int MatDestroy_MPIAIJ(PetscObject obj) 647 { 648 Mat mat = (Mat) obj; 649 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 650 int ierr; 651 #if defined(PETSC_LOG) 652 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 653 #endif 654 PetscFree(aij->rowners); 655 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 656 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 657 if (aij->colmap) PetscFree(aij->colmap); 658 if (aij->garray) PetscFree(aij->garray); 659 if (aij->lvec) VecDestroy(aij->lvec); 660 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 661 if (aij->rowvalues) PetscFree(aij->rowvalues); 662 PetscFree(aij); 663 if (mat->mapping) { 664 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 665 } 666 PLogObjectDestroy(mat); 667 PetscHeaderDestroy(mat); 668 return 0; 669 } 670 #include "draw.h" 671 #include "pinclude/pviewer.h" 672 673 #undef __FUNC__ 674 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 675 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 676 { 677 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 678 int ierr; 679 680 if (aij->size == 1) { 681 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 682 } 683 else SETERRQ(1,0,"Only uniprocessor output supported"); 684 return 0; 685 } 686 687 #undef __FUNC__ 688 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 689 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 690 { 691 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 692 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 693 int ierr, format,shift = C->indexshift,rank; 694 FILE *fd; 695 ViewerType vtype; 696 697 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 698 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 699 ierr = ViewerGetFormat(viewer,&format); 700 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 701 MatInfo info; 702 int flg; 703 MPI_Comm_rank(mat->comm,&rank); 704 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 705 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 706 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 707 PetscSequentialPhaseBegin(mat->comm,1); 708 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 709 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 710 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 711 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 712 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 713 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 714 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 715 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 716 fflush(fd); 717 PetscSequentialPhaseEnd(mat->comm,1); 718 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 719 return 0; 720 } 721 else if (format == VIEWER_FORMAT_ASCII_INFO) { 722 return 0; 723 } 724 } 725 726 if (vtype == DRAW_VIEWER) { 727 Draw draw; 728 PetscTruth isnull; 729 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 730 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 731 } 732 733 if (vtype == ASCII_FILE_VIEWER) { 734 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 735 PetscSequentialPhaseBegin(mat->comm,1); 736 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 737 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 738 aij->cend); 739 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 740 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 741 fflush(fd); 742 PetscSequentialPhaseEnd(mat->comm,1); 743 } 744 else { 745 int size = aij->size; 746 rank = aij->rank; 747 if (size == 1) { 748 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 749 } 750 else { 751 /* assemble the entire matrix onto first processor. */ 752 Mat A; 753 Mat_SeqAIJ *Aloc; 754 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 755 Scalar *a; 756 757 if (!rank) { 758 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 759 CHKERRQ(ierr); 760 } 761 else { 762 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 763 CHKERRQ(ierr); 764 } 765 PLogObjectParent(mat,A); 766 767 /* copy over the A part */ 768 Aloc = (Mat_SeqAIJ*) aij->A->data; 769 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 770 row = aij->rstart; 771 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 772 for ( i=0; i<m; i++ ) { 773 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 774 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 775 } 776 aj = Aloc->j; 777 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 778 779 /* copy over the B part */ 780 Aloc = (Mat_SeqAIJ*) aij->B->data; 781 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 782 row = aij->rstart; 783 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 784 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 785 for ( i=0; i<m; i++ ) { 786 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 787 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 788 } 789 PetscFree(ct); 790 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 791 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 792 if (!rank) { 793 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 794 } 795 ierr = MatDestroy(A); CHKERRQ(ierr); 796 } 797 } 798 return 0; 799 } 800 801 #undef __FUNC__ 802 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 803 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 804 { 805 Mat mat = (Mat) obj; 806 int ierr; 807 ViewerType vtype; 808 809 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 810 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 811 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 812 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 813 } 814 else if (vtype == BINARY_FILE_VIEWER) { 815 return MatView_MPIAIJ_Binary(mat,viewer); 816 } 817 return 0; 818 } 819 820 /* 821 This has to provide several versions. 822 823 2) a) use only local smoothing updating outer values only once. 824 b) local smoothing updating outer values each inner iteration 825 3) color updating out values betwen colors. 826 */ 827 #undef __FUNC__ 828 #define __FUNC__ "MatRelax_MPIAIJ" 829 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 830 double fshift,int its,Vec xx) 831 { 832 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 833 Mat AA = mat->A, BB = mat->B; 834 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 835 Scalar *b,*x,*xs,*ls,d,*v,sum; 836 int ierr,*idx, *diag; 837 int n = mat->n, m = mat->m, i,shift = A->indexshift; 838 839 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 840 xs = x + shift; /* shift by one for index start of 1 */ 841 ls = ls + shift; 842 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 843 diag = A->diag; 844 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 845 if (flag & SOR_ZERO_INITIAL_GUESS) { 846 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 847 } 848 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 849 CHKERRQ(ierr); 850 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 851 CHKERRQ(ierr); 852 while (its--) { 853 /* go down through the rows */ 854 for ( i=0; i<m; i++ ) { 855 n = A->i[i+1] - A->i[i]; 856 idx = A->j + A->i[i] + shift; 857 v = A->a + A->i[i] + shift; 858 sum = b[i]; 859 SPARSEDENSEMDOT(sum,xs,v,idx,n); 860 d = fshift + A->a[diag[i]+shift]; 861 n = B->i[i+1] - B->i[i]; 862 idx = B->j + B->i[i] + shift; 863 v = B->a + B->i[i] + shift; 864 SPARSEDENSEMDOT(sum,ls,v,idx,n); 865 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 866 } 867 /* come up through the rows */ 868 for ( i=m-1; i>-1; i-- ) { 869 n = A->i[i+1] - A->i[i]; 870 idx = A->j + A->i[i] + shift; 871 v = A->a + A->i[i] + shift; 872 sum = b[i]; 873 SPARSEDENSEMDOT(sum,xs,v,idx,n); 874 d = fshift + A->a[diag[i]+shift]; 875 n = B->i[i+1] - B->i[i]; 876 idx = B->j + B->i[i] + shift; 877 v = B->a + B->i[i] + shift; 878 SPARSEDENSEMDOT(sum,ls,v,idx,n); 879 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 880 } 881 } 882 } 883 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 884 if (flag & SOR_ZERO_INITIAL_GUESS) { 885 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 886 } 887 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 888 CHKERRQ(ierr); 889 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 890 CHKERRQ(ierr); 891 while (its--) { 892 for ( i=0; i<m; i++ ) { 893 n = A->i[i+1] - A->i[i]; 894 idx = A->j + A->i[i] + shift; 895 v = A->a + A->i[i] + shift; 896 sum = b[i]; 897 SPARSEDENSEMDOT(sum,xs,v,idx,n); 898 d = fshift + A->a[diag[i]+shift]; 899 n = B->i[i+1] - B->i[i]; 900 idx = B->j + B->i[i] + shift; 901 v = B->a + B->i[i] + shift; 902 SPARSEDENSEMDOT(sum,ls,v,idx,n); 903 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 904 } 905 } 906 } 907 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 908 if (flag & SOR_ZERO_INITIAL_GUESS) { 909 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 910 } 911 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 912 mat->Mvctx); CHKERRQ(ierr); 913 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 914 mat->Mvctx); CHKERRQ(ierr); 915 while (its--) { 916 for ( i=m-1; i>-1; i-- ) { 917 n = A->i[i+1] - A->i[i]; 918 idx = A->j + A->i[i] + shift; 919 v = A->a + A->i[i] + shift; 920 sum = b[i]; 921 SPARSEDENSEMDOT(sum,xs,v,idx,n); 922 d = fshift + A->a[diag[i]+shift]; 923 n = B->i[i+1] - B->i[i]; 924 idx = B->j + B->i[i] + shift; 925 v = B->a + B->i[i] + shift; 926 SPARSEDENSEMDOT(sum,ls,v,idx,n); 927 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 928 } 929 } 930 } 931 else { 932 SETERRQ(1,0,"Option not supported"); 933 } 934 return 0; 935 } 936 937 #undef __FUNC__ 938 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 939 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 940 { 941 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 942 Mat A = mat->A, B = mat->B; 943 int ierr; 944 double isend[5], irecv[5]; 945 946 info->rows_global = (double)mat->M; 947 info->columns_global = (double)mat->N; 948 info->rows_local = (double)mat->m; 949 info->columns_local = (double)mat->N; 950 info->block_size = 1.0; 951 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 952 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 953 isend[3] = info->memory; isend[4] = info->mallocs; 954 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 955 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 956 isend[3] += info->memory; isend[4] += info->mallocs; 957 if (flag == MAT_LOCAL) { 958 info->nz_used = isend[0]; 959 info->nz_allocated = isend[1]; 960 info->nz_unneeded = isend[2]; 961 info->memory = isend[3]; 962 info->mallocs = isend[4]; 963 } else if (flag == MAT_GLOBAL_MAX) { 964 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 965 info->nz_used = irecv[0]; 966 info->nz_allocated = irecv[1]; 967 info->nz_unneeded = irecv[2]; 968 info->memory = irecv[3]; 969 info->mallocs = irecv[4]; 970 } else if (flag == MAT_GLOBAL_SUM) { 971 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 972 info->nz_used = irecv[0]; 973 info->nz_allocated = irecv[1]; 974 info->nz_unneeded = irecv[2]; 975 info->memory = irecv[3]; 976 info->mallocs = irecv[4]; 977 } 978 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 979 info->fill_ratio_needed = 0; 980 info->factor_mallocs = 0; 981 982 return 0; 983 } 984 985 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 986 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 987 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 988 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 989 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 990 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 991 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 992 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 993 994 #undef __FUNC__ 995 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 996 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 997 { 998 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 999 1000 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1001 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1002 op == MAT_COLUMNS_UNSORTED || 1003 op == MAT_COLUMNS_SORTED) { 1004 MatSetOption(a->A,op); 1005 MatSetOption(a->B,op); 1006 } else if (op == MAT_ROW_ORIENTED) { 1007 a->roworiented = 1; 1008 MatSetOption(a->A,op); 1009 MatSetOption(a->B,op); 1010 } else if (op == MAT_ROWS_SORTED || 1011 op == MAT_ROWS_UNSORTED || 1012 op == MAT_SYMMETRIC || 1013 op == MAT_STRUCTURALLY_SYMMETRIC || 1014 op == MAT_YES_NEW_DIAGONALS) 1015 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1016 else if (op == MAT_COLUMN_ORIENTED) { 1017 a->roworiented = 0; 1018 MatSetOption(a->A,op); 1019 MatSetOption(a->B,op); 1020 } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 1021 a->donotstash = 1; 1022 } else if (op == MAT_NO_NEW_DIAGONALS) 1023 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1024 else 1025 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1026 return 0; 1027 } 1028 1029 #undef __FUNC__ 1030 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 1031 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1032 { 1033 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1034 *m = mat->M; *n = mat->N; 1035 return 0; 1036 } 1037 1038 #undef __FUNC__ 1039 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 1040 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1041 { 1042 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1043 *m = mat->m; *n = mat->N; 1044 return 0; 1045 } 1046 1047 #undef __FUNC__ 1048 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 1049 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1050 { 1051 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1052 *m = mat->rstart; *n = mat->rend; 1053 return 0; 1054 } 1055 1056 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1057 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1058 1059 #undef __FUNC__ 1060 #define __FUNC__ "MatGetRow_MPIAIJ" 1061 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1062 { 1063 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1064 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1065 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1066 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1067 int *cmap, *idx_p; 1068 1069 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1070 mat->getrowactive = PETSC_TRUE; 1071 1072 if (!mat->rowvalues && (idx || v)) { 1073 /* 1074 allocate enough space to hold information from the longest row. 1075 */ 1076 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1077 int max = 1,m = mat->m,tmp; 1078 for ( i=0; i<m; i++ ) { 1079 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1080 if (max < tmp) { max = tmp; } 1081 } 1082 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1083 CHKPTRQ(mat->rowvalues); 1084 mat->rowindices = (int *) (mat->rowvalues + max); 1085 } 1086 1087 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1088 lrow = row - rstart; 1089 1090 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1091 if (!v) {pvA = 0; pvB = 0;} 1092 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1093 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1094 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1095 nztot = nzA + nzB; 1096 1097 cmap = mat->garray; 1098 if (v || idx) { 1099 if (nztot) { 1100 /* Sort by increasing column numbers, assuming A and B already sorted */ 1101 int imark = -1; 1102 if (v) { 1103 *v = v_p = mat->rowvalues; 1104 for ( i=0; i<nzB; i++ ) { 1105 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1106 else break; 1107 } 1108 imark = i; 1109 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1110 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1111 } 1112 if (idx) { 1113 *idx = idx_p = mat->rowindices; 1114 if (imark > -1) { 1115 for ( i=0; i<imark; i++ ) { 1116 idx_p[i] = cmap[cworkB[i]]; 1117 } 1118 } else { 1119 for ( i=0; i<nzB; i++ ) { 1120 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1121 else break; 1122 } 1123 imark = i; 1124 } 1125 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1126 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1127 } 1128 } 1129 else { 1130 if (idx) *idx = 0; 1131 if (v) *v = 0; 1132 } 1133 } 1134 *nz = nztot; 1135 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1136 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1137 return 0; 1138 } 1139 1140 #undef __FUNC__ 1141 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 1142 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1143 { 1144 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1145 if (aij->getrowactive == PETSC_FALSE) { 1146 SETERRQ(1,0,"MatGetRow not called"); 1147 } 1148 aij->getrowactive = PETSC_FALSE; 1149 return 0; 1150 } 1151 1152 #undef __FUNC__ 1153 #define __FUNC__ "MatNorm_MPIAIJ" 1154 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1155 { 1156 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1157 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1158 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1159 double sum = 0.0; 1160 Scalar *v; 1161 1162 if (aij->size == 1) { 1163 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1164 } else { 1165 if (type == NORM_FROBENIUS) { 1166 v = amat->a; 1167 for (i=0; i<amat->nz; i++ ) { 1168 #if defined(PETSC_COMPLEX) 1169 sum += real(conj(*v)*(*v)); v++; 1170 #else 1171 sum += (*v)*(*v); v++; 1172 #endif 1173 } 1174 v = bmat->a; 1175 for (i=0; i<bmat->nz; i++ ) { 1176 #if defined(PETSC_COMPLEX) 1177 sum += real(conj(*v)*(*v)); v++; 1178 #else 1179 sum += (*v)*(*v); v++; 1180 #endif 1181 } 1182 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1183 *norm = sqrt(*norm); 1184 } 1185 else if (type == NORM_1) { /* max column norm */ 1186 double *tmp, *tmp2; 1187 int *jj, *garray = aij->garray; 1188 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1189 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1190 PetscMemzero(tmp,aij->N*sizeof(double)); 1191 *norm = 0.0; 1192 v = amat->a; jj = amat->j; 1193 for ( j=0; j<amat->nz; j++ ) { 1194 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1195 } 1196 v = bmat->a; jj = bmat->j; 1197 for ( j=0; j<bmat->nz; j++ ) { 1198 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1199 } 1200 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1201 for ( j=0; j<aij->N; j++ ) { 1202 if (tmp2[j] > *norm) *norm = tmp2[j]; 1203 } 1204 PetscFree(tmp); PetscFree(tmp2); 1205 } 1206 else if (type == NORM_INFINITY) { /* max row norm */ 1207 double ntemp = 0.0; 1208 for ( j=0; j<amat->m; j++ ) { 1209 v = amat->a + amat->i[j] + shift; 1210 sum = 0.0; 1211 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1212 sum += PetscAbsScalar(*v); v++; 1213 } 1214 v = bmat->a + bmat->i[j] + shift; 1215 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1216 sum += PetscAbsScalar(*v); v++; 1217 } 1218 if (sum > ntemp) ntemp = sum; 1219 } 1220 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1221 } 1222 else { 1223 SETERRQ(1,0,"No support for two norm"); 1224 } 1225 } 1226 return 0; 1227 } 1228 1229 #undef __FUNC__ 1230 #define __FUNC__ "MatTranspose_MPIAIJ" 1231 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1232 { 1233 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1234 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1235 int ierr,shift = Aloc->indexshift; 1236 Mat B; 1237 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1238 Scalar *array; 1239 1240 if (matout == PETSC_NULL && M != N) 1241 SETERRQ(1,0,"Square matrix only for in-place"); 1242 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1243 PETSC_NULL,&B); CHKERRQ(ierr); 1244 1245 /* copy over the A part */ 1246 Aloc = (Mat_SeqAIJ*) a->A->data; 1247 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1248 row = a->rstart; 1249 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1250 for ( i=0; i<m; i++ ) { 1251 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1252 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1253 } 1254 aj = Aloc->j; 1255 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1256 1257 /* copy over the B part */ 1258 Aloc = (Mat_SeqAIJ*) a->B->data; 1259 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1260 row = a->rstart; 1261 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1262 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1263 for ( i=0; i<m; i++ ) { 1264 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1265 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1266 } 1267 PetscFree(ct); 1268 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1269 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1270 if (matout != PETSC_NULL) { 1271 *matout = B; 1272 } else { 1273 /* This isn't really an in-place transpose .... but free data structures from a */ 1274 PetscFree(a->rowners); 1275 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1276 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1277 if (a->colmap) PetscFree(a->colmap); 1278 if (a->garray) PetscFree(a->garray); 1279 if (a->lvec) VecDestroy(a->lvec); 1280 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1281 PetscFree(a); 1282 PetscMemcpy(A,B,sizeof(struct _Mat)); 1283 PetscHeaderDestroy(B); 1284 } 1285 return 0; 1286 } 1287 1288 #undef __FUNC__ 1289 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1290 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1293 Mat a = aij->A, b = aij->B; 1294 int ierr,s1,s2,s3; 1295 1296 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1297 if (rr) { 1298 s3 = aij->n; 1299 VecGetLocalSize_Fast(rr,s1); 1300 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1301 /* Overlap communication with computation. */ 1302 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1303 } 1304 if (ll) { 1305 VecGetLocalSize_Fast(ll,s1); 1306 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1307 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1308 } 1309 /* scale the diagonal block */ 1310 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1311 1312 if (rr) { 1313 /* Do a scatter end and then right scale the off-diagonal block */ 1314 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1315 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1316 } 1317 1318 return 0; 1319 } 1320 1321 1322 extern int MatPrintHelp_SeqAIJ(Mat); 1323 #undef __FUNC__ 1324 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 1325 static int MatPrintHelp_MPIAIJ(Mat A) 1326 { 1327 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1328 1329 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1330 else return 0; 1331 } 1332 1333 #undef __FUNC__ 1334 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 1335 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1336 { 1337 *bs = 1; 1338 return 0; 1339 } 1340 #undef __FUNC__ 1341 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 1342 static int MatSetUnfactored_MPIAIJ(Mat A) 1343 { 1344 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1345 int ierr; 1346 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1347 return 0; 1348 } 1349 1350 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1351 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1352 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1353 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1354 /* -------------------------------------------------------------------*/ 1355 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1356 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1357 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1358 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1359 0,0, 1360 0,0, 1361 0,0, 1362 MatRelax_MPIAIJ, 1363 MatTranspose_MPIAIJ, 1364 MatGetInfo_MPIAIJ,0, 1365 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1366 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1367 0, 1368 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1369 0,0,0,0, 1370 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1371 0,0, 1372 0,0,MatConvertSameType_MPIAIJ,0,0, 1373 0,0,0, 1374 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1375 MatPrintHelp_MPIAIJ, 1376 MatScale_MPIAIJ,0,0,0, 1377 MatGetBlockSize_MPIAIJ,0,0,0,0, 1378 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1379 1380 1381 #undef __FUNC__ 1382 #define __FUNC__ "MatCreateMPIAIJ" 1383 /*@C 1384 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1385 (the default parallel PETSc format). For good matrix assembly performance 1386 the user should preallocate the matrix storage by setting the parameters 1387 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1388 performance can be increased by more than a factor of 50. 1389 1390 Input Parameters: 1391 . comm - MPI communicator 1392 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1393 This value should be the same as the local size used in creating the 1394 y vector for the matrix-vector product y = Ax. 1395 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1396 This value should be the same as the local size used in creating the 1397 x vector for the matrix-vector product y = Ax. 1398 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1399 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1400 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1401 (same for all local rows) 1402 . d_nzz - array containing the number of nonzeros in the various rows of the 1403 diagonal portion of the local submatrix (possibly different for each row) 1404 or PETSC_NULL. You must leave room for the diagonal entry even if 1405 it is zero. 1406 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1407 submatrix (same for all local rows). 1408 . o_nzz - array containing the number of nonzeros in the various rows of the 1409 off-diagonal portion of the local submatrix (possibly different for 1410 each row) or PETSC_NULL. 1411 1412 Output Parameter: 1413 . A - the matrix 1414 1415 Notes: 1416 The AIJ format (also called the Yale sparse matrix format or 1417 compressed row storage), is fully compatible with standard Fortran 77 1418 storage. That is, the stored row and column indices can begin at 1419 either one (as in Fortran) or zero. See the users manual for details. 1420 1421 The user MUST specify either the local or global matrix dimensions 1422 (possibly both). 1423 1424 By default, this format uses inodes (identical nodes) when possible. 1425 We search for consecutive rows with the same nonzero structure, thereby 1426 reusing matrix information to achieve increased efficiency. 1427 1428 Options Database Keys: 1429 $ -mat_aij_no_inode - Do not use inodes 1430 $ -mat_aij_inode_limit <limit> - Set inode limit. 1431 $ (max limit=5) 1432 $ -mat_aij_oneindex - Internally use indexing starting at 1 1433 $ rather than 0. Note: When calling MatSetValues(), 1434 $ the user still MUST index entries starting at 0! 1435 1436 Storage Information: 1437 For a square global matrix we define each processor's diagonal portion 1438 to be its local rows and the corresponding columns (a square submatrix); 1439 each processor's off-diagonal portion encompasses the remainder of the 1440 local matrix (a rectangular submatrix). 1441 1442 The user can specify preallocated storage for the diagonal part of 1443 the local submatrix with either d_nz or d_nnz (not both). Set 1444 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1445 memory allocation. Likewise, specify preallocated storage for the 1446 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1447 1448 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1449 the figure below we depict these three local rows and all columns (0-11). 1450 1451 $ 0 1 2 3 4 5 6 7 8 9 10 11 1452 $ ------------------- 1453 $ row 3 | o o o d d d o o o o o o 1454 $ row 4 | o o o d d d o o o o o o 1455 $ row 5 | o o o d d d o o o o o o 1456 $ ------------------- 1457 $ 1458 1459 Thus, any entries in the d locations are stored in the d (diagonal) 1460 submatrix, and any entries in the o locations are stored in the 1461 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1462 stored simply in the MATSEQAIJ format for compressed row storage. 1463 1464 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1465 and o_nz should indicate the number of nonzeros per row in the o matrix. 1466 In general, for PDE problems in which most nonzeros are near the diagonal, 1467 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1468 or you will get TERRIBLE performance; see the users' manual chapter on 1469 matrices. 1470 1471 .keywords: matrix, aij, compressed row, sparse, parallel 1472 1473 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1474 @*/ 1475 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1476 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1477 { 1478 Mat B; 1479 Mat_MPIAIJ *b; 1480 int ierr, i,sum[2],work[2],size; 1481 1482 *A = 0; 1483 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1484 PLogObjectCreate(B); 1485 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1486 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1487 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1488 MPI_Comm_size(comm,&size); 1489 if (size == 1) { 1490 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1491 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1492 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1493 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1494 B->ops.lufactor = MatLUFactor_MPIAIJ; 1495 B->ops.solve = MatSolve_MPIAIJ; 1496 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1497 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1498 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1499 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1500 } 1501 B->destroy = MatDestroy_MPIAIJ; 1502 B->view = MatView_MPIAIJ; 1503 B->factor = 0; 1504 B->assembled = PETSC_FALSE; 1505 B->mapping = 0; 1506 1507 B->insertmode = NOT_SET_VALUES; 1508 MPI_Comm_rank(comm,&b->rank); 1509 MPI_Comm_size(comm,&b->size); 1510 1511 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1512 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1513 1514 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1515 work[0] = m; work[1] = n; 1516 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1517 if (M == PETSC_DECIDE) M = sum[0]; 1518 if (N == PETSC_DECIDE) N = sum[1]; 1519 } 1520 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1521 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1522 b->m = m; B->m = m; 1523 b->n = n; B->n = n; 1524 b->N = N; B->N = N; 1525 b->M = M; B->M = M; 1526 1527 /* build local table of row and column ownerships */ 1528 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1529 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1530 b->cowners = b->rowners + b->size + 2; 1531 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1532 b->rowners[0] = 0; 1533 for ( i=2; i<=b->size; i++ ) { 1534 b->rowners[i] += b->rowners[i-1]; 1535 } 1536 b->rstart = b->rowners[b->rank]; 1537 b->rend = b->rowners[b->rank+1]; 1538 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1539 b->cowners[0] = 0; 1540 for ( i=2; i<=b->size; i++ ) { 1541 b->cowners[i] += b->cowners[i-1]; 1542 } 1543 b->cstart = b->cowners[b->rank]; 1544 b->cend = b->cowners[b->rank+1]; 1545 1546 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1547 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1548 PLogObjectParent(B,b->A); 1549 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1550 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1551 PLogObjectParent(B,b->B); 1552 1553 /* build cache for off array entries formed */ 1554 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1555 b->donotstash = 0; 1556 b->colmap = 0; 1557 b->garray = 0; 1558 b->roworiented = 1; 1559 1560 /* stuff used for matrix vector multiply */ 1561 b->lvec = 0; 1562 b->Mvctx = 0; 1563 1564 /* stuff for MatGetRow() */ 1565 b->rowindices = 0; 1566 b->rowvalues = 0; 1567 b->getrowactive = PETSC_FALSE; 1568 1569 *A = B; 1570 return 0; 1571 } 1572 1573 #undef __FUNC__ 1574 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1575 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1576 { 1577 Mat mat; 1578 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1579 int ierr, len=0, flg; 1580 1581 *newmat = 0; 1582 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1583 PLogObjectCreate(mat); 1584 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1585 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1586 mat->destroy = MatDestroy_MPIAIJ; 1587 mat->view = MatView_MPIAIJ; 1588 mat->factor = matin->factor; 1589 mat->assembled = PETSC_TRUE; 1590 1591 a->m = mat->m = oldmat->m; 1592 a->n = mat->n = oldmat->n; 1593 a->M = mat->M = oldmat->M; 1594 a->N = mat->N = oldmat->N; 1595 1596 a->rstart = oldmat->rstart; 1597 a->rend = oldmat->rend; 1598 a->cstart = oldmat->cstart; 1599 a->cend = oldmat->cend; 1600 a->size = oldmat->size; 1601 a->rank = oldmat->rank; 1602 mat->insertmode = NOT_SET_VALUES; 1603 a->rowvalues = 0; 1604 a->getrowactive = PETSC_FALSE; 1605 1606 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1607 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1608 a->cowners = a->rowners + a->size + 2; 1609 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1610 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1611 if (oldmat->colmap) { 1612 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1613 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1614 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1615 } else a->colmap = 0; 1616 if (oldmat->garray) { 1617 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1618 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1619 PLogObjectMemory(mat,len*sizeof(int)); 1620 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1621 } else a->garray = 0; 1622 1623 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1624 PLogObjectParent(mat,a->lvec); 1625 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1626 PLogObjectParent(mat,a->Mvctx); 1627 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1628 PLogObjectParent(mat,a->A); 1629 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1630 PLogObjectParent(mat,a->B); 1631 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1632 if (flg) { 1633 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1634 } 1635 *newmat = mat; 1636 return 0; 1637 } 1638 1639 #include "sys.h" 1640 1641 #undef __FUNC__ 1642 #define __FUNC__ "MatLoad_MPIAIJ" 1643 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1644 { 1645 Mat A; 1646 int i, nz, ierr, j,rstart, rend, fd; 1647 Scalar *vals,*svals; 1648 MPI_Comm comm = ((PetscObject)viewer)->comm; 1649 MPI_Status status; 1650 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1651 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1652 int tag = ((PetscObject)viewer)->tag; 1653 1654 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1655 if (!rank) { 1656 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1657 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1658 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1659 } 1660 1661 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1662 M = header[1]; N = header[2]; 1663 /* determine ownership of all rows */ 1664 m = M/size + ((M % size) > rank); 1665 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1666 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1667 rowners[0] = 0; 1668 for ( i=2; i<=size; i++ ) { 1669 rowners[i] += rowners[i-1]; 1670 } 1671 rstart = rowners[rank]; 1672 rend = rowners[rank+1]; 1673 1674 /* distribute row lengths to all processors */ 1675 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1676 offlens = ourlens + (rend-rstart); 1677 if (!rank) { 1678 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1679 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1680 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1681 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1682 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1683 PetscFree(sndcounts); 1684 } 1685 else { 1686 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1687 } 1688 1689 if (!rank) { 1690 /* calculate the number of nonzeros on each processor */ 1691 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1692 PetscMemzero(procsnz,size*sizeof(int)); 1693 for ( i=0; i<size; i++ ) { 1694 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1695 procsnz[i] += rowlengths[j]; 1696 } 1697 } 1698 PetscFree(rowlengths); 1699 1700 /* determine max buffer needed and allocate it */ 1701 maxnz = 0; 1702 for ( i=0; i<size; i++ ) { 1703 maxnz = PetscMax(maxnz,procsnz[i]); 1704 } 1705 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1706 1707 /* read in my part of the matrix column indices */ 1708 nz = procsnz[0]; 1709 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1710 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1711 1712 /* read in every one elses and ship off */ 1713 for ( i=1; i<size; i++ ) { 1714 nz = procsnz[i]; 1715 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1716 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1717 } 1718 PetscFree(cols); 1719 } 1720 else { 1721 /* determine buffer space needed for message */ 1722 nz = 0; 1723 for ( i=0; i<m; i++ ) { 1724 nz += ourlens[i]; 1725 } 1726 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1727 1728 /* receive message of column indices*/ 1729 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1730 MPI_Get_count(&status,MPI_INT,&maxnz); 1731 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1732 } 1733 1734 /* loop over local rows, determining number of off diagonal entries */ 1735 PetscMemzero(offlens,m*sizeof(int)); 1736 jj = 0; 1737 for ( i=0; i<m; i++ ) { 1738 for ( j=0; j<ourlens[i]; j++ ) { 1739 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1740 jj++; 1741 } 1742 } 1743 1744 /* create our matrix */ 1745 for ( i=0; i<m; i++ ) { 1746 ourlens[i] -= offlens[i]; 1747 } 1748 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1749 A = *newmat; 1750 MatSetOption(A,MAT_COLUMNS_SORTED); 1751 for ( i=0; i<m; i++ ) { 1752 ourlens[i] += offlens[i]; 1753 } 1754 1755 if (!rank) { 1756 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1757 1758 /* read in my part of the matrix numerical values */ 1759 nz = procsnz[0]; 1760 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1761 1762 /* insert into matrix */ 1763 jj = rstart; 1764 smycols = mycols; 1765 svals = vals; 1766 for ( i=0; i<m; i++ ) { 1767 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1768 smycols += ourlens[i]; 1769 svals += ourlens[i]; 1770 jj++; 1771 } 1772 1773 /* read in other processors and ship out */ 1774 for ( i=1; i<size; i++ ) { 1775 nz = procsnz[i]; 1776 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1777 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1778 } 1779 PetscFree(procsnz); 1780 } 1781 else { 1782 /* receive numeric values */ 1783 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1784 1785 /* receive message of values*/ 1786 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1787 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1788 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1789 1790 /* insert into matrix */ 1791 jj = rstart; 1792 smycols = mycols; 1793 svals = vals; 1794 for ( i=0; i<m; i++ ) { 1795 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1796 smycols += ourlens[i]; 1797 svals += ourlens[i]; 1798 jj++; 1799 } 1800 } 1801 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1802 1803 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1804 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1805 return 0; 1806 } 1807