1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.194 1997/03/13 16:33:58 curfman Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 return 0; 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #undef __FUNC__ 33 #define __FUNC__ "MatGetRowIJ_MPIAIJ" 34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 35 PetscTruth *done) 36 { 37 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 38 int ierr; 39 if (aij->size == 1) { 40 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 41 } else SETERRQ(1,0,"not supported in parallel"); 42 return 0; 43 } 44 45 #undef __FUNC__ 46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 48 PetscTruth *done) 49 { 50 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 51 int ierr; 52 if (aij->size == 1) { 53 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54 } else SETERRQ(1,0,"not supported in parallel"); 55 return 0; 56 } 57 58 #define CHUNKSIZE 15 59 #define MatSetValues_SeqAIJ_A_Private(A,row,col,value,addv) \ 60 { \ 61 \ 62 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 63 rmax = imax[row]; nrow = ailen[row]; \ 64 col1 = col - shift; \ 65 \ 66 for ( _i=0; _i<nrow; _i++ ) { \ 67 if (rp[_i] > col1) break; \ 68 if (rp[_i] == col1) { \ 69 if (addv == ADD_VALUES) ap[_i] += value; \ 70 else ap[_i] = value; \ 71 goto _noinsert; \ 72 } \ 73 } \ 74 if (nonew) goto _noinsert; \ 75 if (nrow >= rmax) { \ 76 /* there is no extra room in row, therefore enlarge */ \ 77 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 78 Scalar *new_a; \ 79 \ 80 /* malloc new storage space */ \ 81 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 82 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 83 new_j = (int *) (new_a + new_nz); \ 84 new_i = new_j + new_nz; \ 85 \ 86 /* copy over old data into new slots */ \ 87 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 88 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 89 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 90 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 91 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 92 len*sizeof(int)); \ 93 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 94 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 95 len*sizeof(Scalar)); \ 96 /* free up old matrix storage */ \ 97 \ 98 PetscFree(a->a); \ 99 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 100 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 101 a->singlemalloc = 1; \ 102 \ 103 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 104 rmax = imax[row] = imax[row] + CHUNKSIZE; \ 105 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 106 a->maxnz += CHUNKSIZE; \ 107 a->reallocs++; \ 108 } \ 109 N = nrow++ - 1; a->nz++; \ 110 /* shift up all the later entries in this row */ \ 111 for ( ii=N; ii>=_i; ii-- ) { \ 112 rp[ii+1] = rp[ii]; \ 113 ap[ii+1] = ap[ii]; \ 114 } \ 115 rp[_i] = col1; \ 116 ap[_i] = value; \ 117 _noinsert: ; \ 118 ailen[row] = nrow; \ 119 } 120 121 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 122 #undef __FUNC__ 123 #define __FUNC__ "MatSetValues_MPIAIJ" 124 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 125 { 126 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 127 Scalar value; 128 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 129 int cstart = aij->cstart, cend = aij->cend,row,col; 130 int roworiented = aij->roworiented; 131 132 /* Some Variables required in the macro */ 133 Mat A = aij->A; 134 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 135 int *rp,ii,nrow,_i,rmax, N, col1; 136 int *imax = a->imax, *ai = a->i, *ailen = a->ilen; 137 int *aj = a->j, nonew = a->nonew,shift = a->indexshift; 138 Scalar *ap, *aa = a->a; 139 140 for ( i=0; i<m; i++ ) { 141 #if defined(PETSC_BOPT_g) 142 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 143 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 144 #endif 145 if (im[i] >= rstart && im[i] < rend) { 146 row = im[i] - rstart; 147 for ( j=0; j<n; j++ ) { 148 if (in[j] >= cstart && in[j] < cend){ 149 col = in[j] - cstart; 150 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 151 MatSetValues_SeqAIJ_A_Private(aij->A,row,col,value,addv); 152 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 153 } 154 #if defined(PETSC_BOPT_g) 155 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 156 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 157 #endif 158 else { 159 if (mat->was_assembled) { 160 if (!aij->colmap) { 161 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 162 } 163 col = aij->colmap[in[j]] - 1; 164 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 165 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 166 col = in[j]; 167 } 168 } 169 else col = in[j]; 170 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 171 ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 172 } 173 } 174 } 175 else { 176 if (roworiented && !aij->donotstash) { 177 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 178 } 179 else { 180 if (!aij->donotstash) { 181 row = im[i]; 182 for ( j=0; j<n; j++ ) { 183 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 184 } 185 } 186 } 187 } 188 } 189 return 0; 190 } 191 192 #undef __FUNC__ 193 #define __FUNC__ "MatGetValues_MPIAIJ" 194 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 197 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 198 int cstart = aij->cstart, cend = aij->cend,row,col; 199 200 for ( i=0; i<m; i++ ) { 201 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 202 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 203 if (idxm[i] >= rstart && idxm[i] < rend) { 204 row = idxm[i] - rstart; 205 for ( j=0; j<n; j++ ) { 206 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 207 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 208 if (idxn[j] >= cstart && idxn[j] < cend){ 209 col = idxn[j] - cstart; 210 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 211 } 212 else { 213 if (!aij->colmap) { 214 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 215 } 216 col = aij->colmap[idxn[j]] - 1; 217 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 218 else { 219 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 220 } 221 } 222 } 223 } 224 else { 225 SETERRQ(1,0,"Only local values currently supported"); 226 } 227 } 228 return 0; 229 } 230 231 #undef __FUNC__ 232 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 233 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 234 { 235 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 236 MPI_Comm comm = mat->comm; 237 int size = aij->size, *owners = aij->rowners; 238 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 239 MPI_Request *send_waits,*recv_waits; 240 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 241 InsertMode addv; 242 Scalar *rvalues,*svalues; 243 244 /* make sure all processors are either in INSERTMODE or ADDMODE */ 245 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 246 if (addv == (ADD_VALUES|INSERT_VALUES)) { 247 SETERRQ(1,0,"Some processors inserted others added"); 248 } 249 mat->insertmode = addv; /* in case this processor had no cache */ 250 251 /* first count number of contributors to each processor */ 252 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 253 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 254 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 255 for ( i=0; i<aij->stash.n; i++ ) { 256 idx = aij->stash.idx[i]; 257 for ( j=0; j<size; j++ ) { 258 if (idx >= owners[j] && idx < owners[j+1]) { 259 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 260 } 261 } 262 } 263 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 264 265 /* inform other processors of number of messages and max length*/ 266 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 267 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 268 nreceives = work[rank]; 269 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 270 nmax = work[rank]; 271 PetscFree(work); 272 273 /* post receives: 274 1) each message will consist of ordered pairs 275 (global index,value) we store the global index as a double 276 to simplify the message passing. 277 2) since we don't know how long each individual message is we 278 allocate the largest needed buffer for each receive. Potentially 279 this is a lot of wasted space. 280 281 282 This could be done better. 283 */ 284 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 285 CHKPTRQ(rvalues); 286 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 287 CHKPTRQ(recv_waits); 288 for ( i=0; i<nreceives; i++ ) { 289 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 290 comm,recv_waits+i); 291 } 292 293 /* do sends: 294 1) starts[i] gives the starting index in svalues for stuff going to 295 the ith processor 296 */ 297 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 298 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 299 CHKPTRQ(send_waits); 300 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 301 starts[0] = 0; 302 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 303 for ( i=0; i<aij->stash.n; i++ ) { 304 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 305 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 306 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 307 } 308 PetscFree(owner); 309 starts[0] = 0; 310 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 311 count = 0; 312 for ( i=0; i<size; i++ ) { 313 if (procs[i]) { 314 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 315 comm,send_waits+count++); 316 } 317 } 318 PetscFree(starts); PetscFree(nprocs); 319 320 /* Free cache space */ 321 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 322 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 323 324 aij->svalues = svalues; aij->rvalues = rvalues; 325 aij->nsends = nsends; aij->nrecvs = nreceives; 326 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 327 aij->rmax = nmax; 328 329 return 0; 330 } 331 extern int MatSetUpMultiply_MPIAIJ(Mat); 332 333 #undef __FUNC__ 334 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 335 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 338 MPI_Status *send_status,recv_status; 339 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 340 int row,col,other_disassembled; 341 Scalar *values,val; 342 InsertMode addv = mat->insertmode; 343 344 /* wait on receives */ 345 while (count) { 346 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 347 /* unpack receives into our local space */ 348 values = aij->rvalues + 3*imdex*aij->rmax; 349 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 350 n = n/3; 351 for ( i=0; i<n; i++ ) { 352 row = (int) PetscReal(values[3*i]) - aij->rstart; 353 col = (int) PetscReal(values[3*i+1]); 354 val = values[3*i+2]; 355 if (col >= aij->cstart && col < aij->cend) { 356 col -= aij->cstart; 357 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 358 } 359 else { 360 if (mat->was_assembled || mat->assembled) { 361 if (!aij->colmap) { 362 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 363 } 364 col = aij->colmap[col] - 1; 365 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 366 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 367 col = (int) PetscReal(values[3*i+1]); 368 } 369 } 370 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 371 } 372 } 373 count--; 374 } 375 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 376 377 /* wait on sends */ 378 if (aij->nsends) { 379 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 380 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 381 PetscFree(send_status); 382 } 383 PetscFree(aij->send_waits); PetscFree(aij->svalues); 384 385 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 386 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 387 388 /* determine if any processor has disassembled, if so we must 389 also disassemble ourselfs, in order that we may reassemble. */ 390 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 391 if (mat->was_assembled && !other_disassembled) { 392 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 393 } 394 395 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 396 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 397 } 398 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 399 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 400 401 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 402 return 0; 403 } 404 405 #undef __FUNC__ 406 #define __FUNC__ "MatZeroEntries_MPIAIJ" 407 int MatZeroEntries_MPIAIJ(Mat A) 408 { 409 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 410 int ierr; 411 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 412 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 413 return 0; 414 } 415 416 /* the code does not do the diagonal entries correctly unless the 417 matrix is square and the column and row owerships are identical. 418 This is a BUG. The only way to fix it seems to be to access 419 aij->A and aij->B directly and not through the MatZeroRows() 420 routine. 421 */ 422 #undef __FUNC__ 423 #define __FUNC__ "MatZeroRows_MPIAIJ" 424 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 425 { 426 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 427 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 428 int *procs,*nprocs,j,found,idx,nsends,*work; 429 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 430 int *rvalues,tag = A->tag,count,base,slen,n,*source; 431 int *lens,imdex,*lrows,*values; 432 MPI_Comm comm = A->comm; 433 MPI_Request *send_waits,*recv_waits; 434 MPI_Status recv_status,*send_status; 435 IS istmp; 436 437 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 438 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 439 440 /* first count number of contributors to each processor */ 441 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 442 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 443 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 444 for ( i=0; i<N; i++ ) { 445 idx = rows[i]; 446 found = 0; 447 for ( j=0; j<size; j++ ) { 448 if (idx >= owners[j] && idx < owners[j+1]) { 449 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 450 } 451 } 452 if (!found) SETERRQ(1,0,"Index out of range"); 453 } 454 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 455 456 /* inform other processors of number of messages and max length*/ 457 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 458 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 459 nrecvs = work[rank]; 460 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 461 nmax = work[rank]; 462 PetscFree(work); 463 464 /* post receives: */ 465 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 466 CHKPTRQ(rvalues); 467 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 468 CHKPTRQ(recv_waits); 469 for ( i=0; i<nrecvs; i++ ) { 470 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 471 } 472 473 /* do sends: 474 1) starts[i] gives the starting index in svalues for stuff going to 475 the ith processor 476 */ 477 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 478 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 479 CHKPTRQ(send_waits); 480 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 481 starts[0] = 0; 482 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 483 for ( i=0; i<N; i++ ) { 484 svalues[starts[owner[i]]++] = rows[i]; 485 } 486 ISRestoreIndices(is,&rows); 487 488 starts[0] = 0; 489 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 490 count = 0; 491 for ( i=0; i<size; i++ ) { 492 if (procs[i]) { 493 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 494 } 495 } 496 PetscFree(starts); 497 498 base = owners[rank]; 499 500 /* wait on receives */ 501 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 502 source = lens + nrecvs; 503 count = nrecvs; slen = 0; 504 while (count) { 505 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 506 /* unpack receives into our local space */ 507 MPI_Get_count(&recv_status,MPI_INT,&n); 508 source[imdex] = recv_status.MPI_SOURCE; 509 lens[imdex] = n; 510 slen += n; 511 count--; 512 } 513 PetscFree(recv_waits); 514 515 /* move the data into the send scatter */ 516 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 517 count = 0; 518 for ( i=0; i<nrecvs; i++ ) { 519 values = rvalues + i*nmax; 520 for ( j=0; j<lens[i]; j++ ) { 521 lrows[count++] = values[j] - base; 522 } 523 } 524 PetscFree(rvalues); PetscFree(lens); 525 PetscFree(owner); PetscFree(nprocs); 526 527 /* actually zap the local rows */ 528 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 529 PLogObjectParent(A,istmp); 530 PetscFree(lrows); 531 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 532 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 533 ierr = ISDestroy(istmp); CHKERRQ(ierr); 534 535 /* wait on sends */ 536 if (nsends) { 537 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 538 CHKPTRQ(send_status); 539 MPI_Waitall(nsends,send_waits,send_status); 540 PetscFree(send_status); 541 } 542 PetscFree(send_waits); PetscFree(svalues); 543 544 return 0; 545 } 546 547 #undef __FUNC__ 548 #define __FUNC__ "MatMult_MPIAIJ" 549 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 550 { 551 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 552 int ierr,nt; 553 554 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 555 if (nt != a->n) { 556 SETERRQ(1,0,"Incompatible partition of A and xx"); 557 } 558 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 559 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 560 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 561 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 562 return 0; 563 } 564 565 #undef __FUNC__ 566 #define __FUNC__ "MatMultAdd_MPIAIJ" 567 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 568 { 569 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 570 int ierr; 571 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 573 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 574 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 575 return 0; 576 } 577 578 #undef __FUNC__ 579 #define __FUNC__ "MatMultTrans_MPIAIJ" 580 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 581 { 582 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 583 int ierr; 584 585 /* do nondiagonal part */ 586 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 587 /* send it on its way */ 588 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 589 /* do local part */ 590 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 591 /* receive remote parts: note this assumes the values are not actually */ 592 /* inserted in yy until the next line, which is true for my implementation*/ 593 /* but is not perhaps always true. */ 594 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 595 return 0; 596 } 597 598 #undef __FUNC__ 599 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 600 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 603 int ierr; 604 605 /* do nondiagonal part */ 606 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 607 /* send it on its way */ 608 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 609 /* do local part */ 610 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 611 /* receive remote parts: note this assumes the values are not actually */ 612 /* inserted in yy until the next line, which is true for my implementation*/ 613 /* but is not perhaps always true. */ 614 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 615 return 0; 616 } 617 618 /* 619 This only works correctly for square matrices where the subblock A->A is the 620 diagonal block 621 */ 622 #undef __FUNC__ 623 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 624 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 625 { 626 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 627 if (a->M != a->N) 628 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 629 if (a->rstart != a->cstart || a->rend != a->cend) { 630 SETERRQ(1,0,"row partition must equal col partition"); } 631 return MatGetDiagonal(a->A,v); 632 } 633 634 #undef __FUNC__ 635 #define __FUNC__ "MatScale_MPIAIJ" 636 int MatScale_MPIAIJ(Scalar *aa,Mat A) 637 { 638 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 639 int ierr; 640 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 641 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 642 return 0; 643 } 644 645 #undef __FUNC__ 646 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 647 int MatDestroy_MPIAIJ(PetscObject obj) 648 { 649 Mat mat = (Mat) obj; 650 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 651 int ierr; 652 #if defined(PETSC_LOG) 653 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 654 #endif 655 PetscFree(aij->rowners); 656 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 657 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 658 if (aij->colmap) PetscFree(aij->colmap); 659 if (aij->garray) PetscFree(aij->garray); 660 if (aij->lvec) VecDestroy(aij->lvec); 661 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 662 if (aij->rowvalues) PetscFree(aij->rowvalues); 663 PetscFree(aij); 664 if (mat->mapping) { 665 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 666 } 667 PLogObjectDestroy(mat); 668 PetscHeaderDestroy(mat); 669 return 0; 670 } 671 672 #undef __FUNC__ 673 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 674 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 675 { 676 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 677 int ierr; 678 679 if (aij->size == 1) { 680 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 681 } 682 else SETERRQ(1,0,"Only uniprocessor output supported"); 683 return 0; 684 } 685 686 #undef __FUNC__ 687 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 688 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 689 { 690 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 691 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 692 int ierr, format,shift = C->indexshift,rank; 693 FILE *fd; 694 ViewerType vtype; 695 696 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 697 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 698 ierr = ViewerGetFormat(viewer,&format); 699 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 700 MatInfo info; 701 int flg; 702 MPI_Comm_rank(mat->comm,&rank); 703 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 704 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 705 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 706 PetscSequentialPhaseBegin(mat->comm,1); 707 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 708 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 709 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 710 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 711 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 712 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 713 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 714 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 715 fflush(fd); 716 PetscSequentialPhaseEnd(mat->comm,1); 717 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 718 return 0; 719 } 720 else if (format == VIEWER_FORMAT_ASCII_INFO) { 721 return 0; 722 } 723 } 724 725 if (vtype == DRAW_VIEWER) { 726 Draw draw; 727 PetscTruth isnull; 728 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 729 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 730 } 731 732 if (vtype == ASCII_FILE_VIEWER) { 733 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 734 PetscSequentialPhaseBegin(mat->comm,1); 735 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 736 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 737 aij->cend); 738 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 739 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 740 fflush(fd); 741 PetscSequentialPhaseEnd(mat->comm,1); 742 } 743 else { 744 int size = aij->size; 745 rank = aij->rank; 746 if (size == 1) { 747 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 748 } 749 else { 750 /* assemble the entire matrix onto first processor. */ 751 Mat A; 752 Mat_SeqAIJ *Aloc; 753 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 754 Scalar *a; 755 756 if (!rank) { 757 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 758 CHKERRQ(ierr); 759 } 760 else { 761 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 762 CHKERRQ(ierr); 763 } 764 PLogObjectParent(mat,A); 765 766 /* copy over the A part */ 767 Aloc = (Mat_SeqAIJ*) aij->A->data; 768 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 769 row = aij->rstart; 770 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 771 for ( i=0; i<m; i++ ) { 772 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 773 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 774 } 775 aj = Aloc->j; 776 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 777 778 /* copy over the B part */ 779 Aloc = (Mat_SeqAIJ*) aij->B->data; 780 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 781 row = aij->rstart; 782 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 783 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 784 for ( i=0; i<m; i++ ) { 785 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 786 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 787 } 788 PetscFree(ct); 789 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 790 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 791 if (!rank) { 792 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 793 } 794 ierr = MatDestroy(A); CHKERRQ(ierr); 795 } 796 } 797 return 0; 798 } 799 800 #undef __FUNC__ 801 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 802 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 803 { 804 Mat mat = (Mat) obj; 805 int ierr; 806 ViewerType vtype; 807 808 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 809 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 810 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 811 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 812 } 813 else if (vtype == BINARY_FILE_VIEWER) { 814 return MatView_MPIAIJ_Binary(mat,viewer); 815 } 816 return 0; 817 } 818 819 /* 820 This has to provide several versions. 821 822 2) a) use only local smoothing updating outer values only once. 823 b) local smoothing updating outer values each inner iteration 824 3) color updating out values betwen colors. 825 */ 826 #undef __FUNC__ 827 #define __FUNC__ "MatRelax_MPIAIJ" 828 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 829 double fshift,int its,Vec xx) 830 { 831 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 832 Mat AA = mat->A, BB = mat->B; 833 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 834 Scalar *b,*x,*xs,*ls,d,*v,sum; 835 int ierr,*idx, *diag; 836 int n = mat->n, m = mat->m, i,shift = A->indexshift; 837 838 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 839 xs = x + shift; /* shift by one for index start of 1 */ 840 ls = ls + shift; 841 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 842 diag = A->diag; 843 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 844 if (flag & SOR_ZERO_INITIAL_GUESS) { 845 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 846 } 847 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 848 CHKERRQ(ierr); 849 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 850 CHKERRQ(ierr); 851 while (its--) { 852 /* go down through the rows */ 853 for ( i=0; i<m; i++ ) { 854 n = A->i[i+1] - A->i[i]; 855 idx = A->j + A->i[i] + shift; 856 v = A->a + A->i[i] + shift; 857 sum = b[i]; 858 SPARSEDENSEMDOT(sum,xs,v,idx,n); 859 d = fshift + A->a[diag[i]+shift]; 860 n = B->i[i+1] - B->i[i]; 861 idx = B->j + B->i[i] + shift; 862 v = B->a + B->i[i] + shift; 863 SPARSEDENSEMDOT(sum,ls,v,idx,n); 864 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 865 } 866 /* come up through the rows */ 867 for ( i=m-1; i>-1; i-- ) { 868 n = A->i[i+1] - A->i[i]; 869 idx = A->j + A->i[i] + shift; 870 v = A->a + A->i[i] + shift; 871 sum = b[i]; 872 SPARSEDENSEMDOT(sum,xs,v,idx,n); 873 d = fshift + A->a[diag[i]+shift]; 874 n = B->i[i+1] - B->i[i]; 875 idx = B->j + B->i[i] + shift; 876 v = B->a + B->i[i] + shift; 877 SPARSEDENSEMDOT(sum,ls,v,idx,n); 878 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 879 } 880 } 881 } 882 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 883 if (flag & SOR_ZERO_INITIAL_GUESS) { 884 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 885 } 886 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 887 CHKERRQ(ierr); 888 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 889 CHKERRQ(ierr); 890 while (its--) { 891 for ( i=0; i<m; i++ ) { 892 n = A->i[i+1] - A->i[i]; 893 idx = A->j + A->i[i] + shift; 894 v = A->a + A->i[i] + shift; 895 sum = b[i]; 896 SPARSEDENSEMDOT(sum,xs,v,idx,n); 897 d = fshift + A->a[diag[i]+shift]; 898 n = B->i[i+1] - B->i[i]; 899 idx = B->j + B->i[i] + shift; 900 v = B->a + B->i[i] + shift; 901 SPARSEDENSEMDOT(sum,ls,v,idx,n); 902 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 903 } 904 } 905 } 906 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 907 if (flag & SOR_ZERO_INITIAL_GUESS) { 908 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 909 } 910 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 911 mat->Mvctx); CHKERRQ(ierr); 912 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 913 mat->Mvctx); CHKERRQ(ierr); 914 while (its--) { 915 for ( i=m-1; i>-1; i-- ) { 916 n = A->i[i+1] - A->i[i]; 917 idx = A->j + A->i[i] + shift; 918 v = A->a + A->i[i] + shift; 919 sum = b[i]; 920 SPARSEDENSEMDOT(sum,xs,v,idx,n); 921 d = fshift + A->a[diag[i]+shift]; 922 n = B->i[i+1] - B->i[i]; 923 idx = B->j + B->i[i] + shift; 924 v = B->a + B->i[i] + shift; 925 SPARSEDENSEMDOT(sum,ls,v,idx,n); 926 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 927 } 928 } 929 } 930 else { 931 SETERRQ(1,0,"Option not supported"); 932 } 933 return 0; 934 } 935 936 #undef __FUNC__ 937 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 938 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 939 { 940 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 941 Mat A = mat->A, B = mat->B; 942 int ierr; 943 double isend[5], irecv[5]; 944 945 info->rows_global = (double)mat->M; 946 info->columns_global = (double)mat->N; 947 info->rows_local = (double)mat->m; 948 info->columns_local = (double)mat->N; 949 info->block_size = 1.0; 950 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 951 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 952 isend[3] = info->memory; isend[4] = info->mallocs; 953 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 954 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 955 isend[3] += info->memory; isend[4] += info->mallocs; 956 if (flag == MAT_LOCAL) { 957 info->nz_used = isend[0]; 958 info->nz_allocated = isend[1]; 959 info->nz_unneeded = isend[2]; 960 info->memory = isend[3]; 961 info->mallocs = isend[4]; 962 } else if (flag == MAT_GLOBAL_MAX) { 963 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 964 info->nz_used = irecv[0]; 965 info->nz_allocated = irecv[1]; 966 info->nz_unneeded = irecv[2]; 967 info->memory = irecv[3]; 968 info->mallocs = irecv[4]; 969 } else if (flag == MAT_GLOBAL_SUM) { 970 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 971 info->nz_used = irecv[0]; 972 info->nz_allocated = irecv[1]; 973 info->nz_unneeded = irecv[2]; 974 info->memory = irecv[3]; 975 info->mallocs = irecv[4]; 976 } 977 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 978 info->fill_ratio_needed = 0; 979 info->factor_mallocs = 0; 980 981 return 0; 982 } 983 984 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 985 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 986 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 987 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 988 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 989 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 990 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 991 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 992 993 #undef __FUNC__ 994 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 995 int MatSetOption_MPIAIJ(Mat A,MatOption op) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 998 999 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1000 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1001 op == MAT_COLUMNS_UNSORTED || 1002 op == MAT_COLUMNS_SORTED || 1003 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1004 MatSetOption(a->A,op); 1005 MatSetOption(a->B,op); 1006 } else if (op == MAT_ROW_ORIENTED) { 1007 a->roworiented = 1; 1008 MatSetOption(a->A,op); 1009 MatSetOption(a->B,op); 1010 } else if (op == MAT_ROWS_SORTED || 1011 op == MAT_ROWS_UNSORTED || 1012 op == MAT_SYMMETRIC || 1013 op == MAT_STRUCTURALLY_SYMMETRIC || 1014 op == MAT_YES_NEW_DIAGONALS) 1015 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1016 else if (op == MAT_COLUMN_ORIENTED) { 1017 a->roworiented = 0; 1018 MatSetOption(a->A,op); 1019 MatSetOption(a->B,op); 1020 } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 1021 a->donotstash = 1; 1022 } else if (op == MAT_NO_NEW_DIAGONALS) 1023 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1024 else 1025 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1026 return 0; 1027 } 1028 1029 #undef __FUNC__ 1030 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 1031 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1032 { 1033 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1034 *m = mat->M; *n = mat->N; 1035 return 0; 1036 } 1037 1038 #undef __FUNC__ 1039 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 1040 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1041 { 1042 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1043 *m = mat->m; *n = mat->N; 1044 return 0; 1045 } 1046 1047 #undef __FUNC__ 1048 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 1049 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1050 { 1051 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1052 *m = mat->rstart; *n = mat->rend; 1053 return 0; 1054 } 1055 1056 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1057 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1058 1059 #undef __FUNC__ 1060 #define __FUNC__ "MatGetRow_MPIAIJ" 1061 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1062 { 1063 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1064 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1065 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1066 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1067 int *cmap, *idx_p; 1068 1069 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1070 mat->getrowactive = PETSC_TRUE; 1071 1072 if (!mat->rowvalues && (idx || v)) { 1073 /* 1074 allocate enough space to hold information from the longest row. 1075 */ 1076 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1077 int max = 1,m = mat->m,tmp; 1078 for ( i=0; i<m; i++ ) { 1079 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1080 if (max < tmp) { max = tmp; } 1081 } 1082 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1083 CHKPTRQ(mat->rowvalues); 1084 mat->rowindices = (int *) (mat->rowvalues + max); 1085 } 1086 1087 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1088 lrow = row - rstart; 1089 1090 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1091 if (!v) {pvA = 0; pvB = 0;} 1092 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1093 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1094 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1095 nztot = nzA + nzB; 1096 1097 cmap = mat->garray; 1098 if (v || idx) { 1099 if (nztot) { 1100 /* Sort by increasing column numbers, assuming A and B already sorted */ 1101 int imark = -1; 1102 if (v) { 1103 *v = v_p = mat->rowvalues; 1104 for ( i=0; i<nzB; i++ ) { 1105 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1106 else break; 1107 } 1108 imark = i; 1109 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1110 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1111 } 1112 if (idx) { 1113 *idx = idx_p = mat->rowindices; 1114 if (imark > -1) { 1115 for ( i=0; i<imark; i++ ) { 1116 idx_p[i] = cmap[cworkB[i]]; 1117 } 1118 } else { 1119 for ( i=0; i<nzB; i++ ) { 1120 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1121 else break; 1122 } 1123 imark = i; 1124 } 1125 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1126 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1127 } 1128 } 1129 else { 1130 if (idx) *idx = 0; 1131 if (v) *v = 0; 1132 } 1133 } 1134 *nz = nztot; 1135 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1136 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1137 return 0; 1138 } 1139 1140 #undef __FUNC__ 1141 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 1142 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1143 { 1144 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1145 if (aij->getrowactive == PETSC_FALSE) { 1146 SETERRQ(1,0,"MatGetRow not called"); 1147 } 1148 aij->getrowactive = PETSC_FALSE; 1149 return 0; 1150 } 1151 1152 #undef __FUNC__ 1153 #define __FUNC__ "MatNorm_MPIAIJ" 1154 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1155 { 1156 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1157 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1158 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1159 double sum = 0.0; 1160 Scalar *v; 1161 1162 if (aij->size == 1) { 1163 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1164 } else { 1165 if (type == NORM_FROBENIUS) { 1166 v = amat->a; 1167 for (i=0; i<amat->nz; i++ ) { 1168 #if defined(PETSC_COMPLEX) 1169 sum += real(conj(*v)*(*v)); v++; 1170 #else 1171 sum += (*v)*(*v); v++; 1172 #endif 1173 } 1174 v = bmat->a; 1175 for (i=0; i<bmat->nz; i++ ) { 1176 #if defined(PETSC_COMPLEX) 1177 sum += real(conj(*v)*(*v)); v++; 1178 #else 1179 sum += (*v)*(*v); v++; 1180 #endif 1181 } 1182 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1183 *norm = sqrt(*norm); 1184 } 1185 else if (type == NORM_1) { /* max column norm */ 1186 double *tmp, *tmp2; 1187 int *jj, *garray = aij->garray; 1188 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1189 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1190 PetscMemzero(tmp,aij->N*sizeof(double)); 1191 *norm = 0.0; 1192 v = amat->a; jj = amat->j; 1193 for ( j=0; j<amat->nz; j++ ) { 1194 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1195 } 1196 v = bmat->a; jj = bmat->j; 1197 for ( j=0; j<bmat->nz; j++ ) { 1198 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1199 } 1200 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1201 for ( j=0; j<aij->N; j++ ) { 1202 if (tmp2[j] > *norm) *norm = tmp2[j]; 1203 } 1204 PetscFree(tmp); PetscFree(tmp2); 1205 } 1206 else if (type == NORM_INFINITY) { /* max row norm */ 1207 double ntemp = 0.0; 1208 for ( j=0; j<amat->m; j++ ) { 1209 v = amat->a + amat->i[j] + shift; 1210 sum = 0.0; 1211 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1212 sum += PetscAbsScalar(*v); v++; 1213 } 1214 v = bmat->a + bmat->i[j] + shift; 1215 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1216 sum += PetscAbsScalar(*v); v++; 1217 } 1218 if (sum > ntemp) ntemp = sum; 1219 } 1220 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1221 } 1222 else { 1223 SETERRQ(1,0,"No support for two norm"); 1224 } 1225 } 1226 return 0; 1227 } 1228 1229 #undef __FUNC__ 1230 #define __FUNC__ "MatTranspose_MPIAIJ" 1231 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1232 { 1233 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1234 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1235 int ierr,shift = Aloc->indexshift; 1236 Mat B; 1237 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1238 Scalar *array; 1239 1240 if (matout == PETSC_NULL && M != N) 1241 SETERRQ(1,0,"Square matrix only for in-place"); 1242 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1243 PETSC_NULL,&B); CHKERRQ(ierr); 1244 1245 /* copy over the A part */ 1246 Aloc = (Mat_SeqAIJ*) a->A->data; 1247 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1248 row = a->rstart; 1249 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1250 for ( i=0; i<m; i++ ) { 1251 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1252 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1253 } 1254 aj = Aloc->j; 1255 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1256 1257 /* copy over the B part */ 1258 Aloc = (Mat_SeqAIJ*) a->B->data; 1259 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1260 row = a->rstart; 1261 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1262 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1263 for ( i=0; i<m; i++ ) { 1264 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1265 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1266 } 1267 PetscFree(ct); 1268 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1269 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1270 if (matout != PETSC_NULL) { 1271 *matout = B; 1272 } else { 1273 /* This isn't really an in-place transpose .... but free data structures from a */ 1274 PetscFree(a->rowners); 1275 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1276 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1277 if (a->colmap) PetscFree(a->colmap); 1278 if (a->garray) PetscFree(a->garray); 1279 if (a->lvec) VecDestroy(a->lvec); 1280 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1281 PetscFree(a); 1282 PetscMemcpy(A,B,sizeof(struct _Mat)); 1283 PetscHeaderDestroy(B); 1284 } 1285 return 0; 1286 } 1287 1288 #undef __FUNC__ 1289 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1290 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1293 Mat a = aij->A, b = aij->B; 1294 int ierr,s1,s2,s3; 1295 1296 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1297 if (rr) { 1298 s3 = aij->n; 1299 VecGetLocalSize_Fast(rr,s1); 1300 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1301 /* Overlap communication with computation. */ 1302 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1303 } 1304 if (ll) { 1305 VecGetLocalSize_Fast(ll,s1); 1306 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1307 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1308 } 1309 /* scale the diagonal block */ 1310 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1311 1312 if (rr) { 1313 /* Do a scatter end and then right scale the off-diagonal block */ 1314 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1315 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1316 } 1317 1318 return 0; 1319 } 1320 1321 1322 extern int MatPrintHelp_SeqAIJ(Mat); 1323 #undef __FUNC__ 1324 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 1325 int MatPrintHelp_MPIAIJ(Mat A) 1326 { 1327 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1328 1329 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1330 else return 0; 1331 } 1332 1333 #undef __FUNC__ 1334 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 1335 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1336 { 1337 *bs = 1; 1338 return 0; 1339 } 1340 #undef __FUNC__ 1341 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 1342 int MatSetUnfactored_MPIAIJ(Mat A) 1343 { 1344 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1345 int ierr; 1346 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1347 return 0; 1348 } 1349 1350 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1351 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1352 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1353 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1354 /* -------------------------------------------------------------------*/ 1355 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1356 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1357 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1358 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1359 0,0, 1360 0,0, 1361 0,0, 1362 MatRelax_MPIAIJ, 1363 MatTranspose_MPIAIJ, 1364 MatGetInfo_MPIAIJ,0, 1365 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1366 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1367 0, 1368 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1369 0,0,0,0, 1370 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1371 0,0, 1372 0,0,MatConvertSameType_MPIAIJ,0,0, 1373 0,0,0, 1374 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1375 MatPrintHelp_MPIAIJ, 1376 MatScale_MPIAIJ,0,0,0, 1377 MatGetBlockSize_MPIAIJ,0,0,0,0, 1378 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1379 1380 1381 #undef __FUNC__ 1382 #define __FUNC__ "MatCreateMPIAIJ" 1383 /*@C 1384 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1385 (the default parallel PETSc format). For good matrix assembly performance 1386 the user should preallocate the matrix storage by setting the parameters 1387 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1388 performance can be increased by more than a factor of 50. 1389 1390 Input Parameters: 1391 . comm - MPI communicator 1392 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1393 This value should be the same as the local size used in creating the 1394 y vector for the matrix-vector product y = Ax. 1395 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1396 This value should be the same as the local size used in creating the 1397 x vector for the matrix-vector product y = Ax. 1398 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1399 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1400 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1401 (same for all local rows) 1402 . d_nzz - array containing the number of nonzeros in the various rows of the 1403 diagonal portion of the local submatrix (possibly different for each row) 1404 or PETSC_NULL. You must leave room for the diagonal entry even if 1405 it is zero. 1406 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1407 submatrix (same for all local rows). 1408 . o_nzz - array containing the number of nonzeros in the various rows of the 1409 off-diagonal portion of the local submatrix (possibly different for 1410 each row) or PETSC_NULL. 1411 1412 Output Parameter: 1413 . A - the matrix 1414 1415 Notes: 1416 The AIJ format (also called the Yale sparse matrix format or 1417 compressed row storage), is fully compatible with standard Fortran 77 1418 storage. That is, the stored row and column indices can begin at 1419 either one (as in Fortran) or zero. See the users manual for details. 1420 1421 The user MUST specify either the local or global matrix dimensions 1422 (possibly both). 1423 1424 By default, this format uses inodes (identical nodes) when possible. 1425 We search for consecutive rows with the same nonzero structure, thereby 1426 reusing matrix information to achieve increased efficiency. 1427 1428 Options Database Keys: 1429 $ -mat_aij_no_inode - Do not use inodes 1430 $ -mat_aij_inode_limit <limit> - Set inode limit. 1431 $ (max limit=5) 1432 $ -mat_aij_oneindex - Internally use indexing starting at 1 1433 $ rather than 0. Note: When calling MatSetValues(), 1434 $ the user still MUST index entries starting at 0! 1435 1436 Storage Information: 1437 For a square global matrix we define each processor's diagonal portion 1438 to be its local rows and the corresponding columns (a square submatrix); 1439 each processor's off-diagonal portion encompasses the remainder of the 1440 local matrix (a rectangular submatrix). 1441 1442 The user can specify preallocated storage for the diagonal part of 1443 the local submatrix with either d_nz or d_nnz (not both). Set 1444 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1445 memory allocation. Likewise, specify preallocated storage for the 1446 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1447 1448 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1449 the figure below we depict these three local rows and all columns (0-11). 1450 1451 $ 0 1 2 3 4 5 6 7 8 9 10 11 1452 $ ------------------- 1453 $ row 3 | o o o d d d o o o o o o 1454 $ row 4 | o o o d d d o o o o o o 1455 $ row 5 | o o o d d d o o o o o o 1456 $ ------------------- 1457 $ 1458 1459 Thus, any entries in the d locations are stored in the d (diagonal) 1460 submatrix, and any entries in the o locations are stored in the 1461 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1462 stored simply in the MATSEQAIJ format for compressed row storage. 1463 1464 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1465 and o_nz should indicate the number of nonzeros per row in the o matrix. 1466 In general, for PDE problems in which most nonzeros are near the diagonal, 1467 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1468 or you will get TERRIBLE performance; see the users' manual chapter on 1469 matrices. 1470 1471 .keywords: matrix, aij, compressed row, sparse, parallel 1472 1473 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1474 @*/ 1475 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1476 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1477 { 1478 Mat B; 1479 Mat_MPIAIJ *b; 1480 int ierr, i,sum[2],work[2],size; 1481 1482 *A = 0; 1483 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1484 PLogObjectCreate(B); 1485 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1486 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1487 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1488 MPI_Comm_size(comm,&size); 1489 if (size == 1) { 1490 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1491 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1492 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1493 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1494 B->ops.lufactor = MatLUFactor_MPIAIJ; 1495 B->ops.solve = MatSolve_MPIAIJ; 1496 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1497 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1498 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1499 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1500 } 1501 B->destroy = MatDestroy_MPIAIJ; 1502 B->view = MatView_MPIAIJ; 1503 B->factor = 0; 1504 B->assembled = PETSC_FALSE; 1505 B->mapping = 0; 1506 1507 B->insertmode = NOT_SET_VALUES; 1508 MPI_Comm_rank(comm,&b->rank); 1509 MPI_Comm_size(comm,&b->size); 1510 1511 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1512 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1513 1514 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1515 work[0] = m; work[1] = n; 1516 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1517 if (M == PETSC_DECIDE) M = sum[0]; 1518 if (N == PETSC_DECIDE) N = sum[1]; 1519 } 1520 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1521 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1522 b->m = m; B->m = m; 1523 b->n = n; B->n = n; 1524 b->N = N; B->N = N; 1525 b->M = M; B->M = M; 1526 1527 /* build local table of row and column ownerships */ 1528 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1529 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1530 b->cowners = b->rowners + b->size + 2; 1531 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1532 b->rowners[0] = 0; 1533 for ( i=2; i<=b->size; i++ ) { 1534 b->rowners[i] += b->rowners[i-1]; 1535 } 1536 b->rstart = b->rowners[b->rank]; 1537 b->rend = b->rowners[b->rank+1]; 1538 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1539 b->cowners[0] = 0; 1540 for ( i=2; i<=b->size; i++ ) { 1541 b->cowners[i] += b->cowners[i-1]; 1542 } 1543 b->cstart = b->cowners[b->rank]; 1544 b->cend = b->cowners[b->rank+1]; 1545 1546 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1547 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1548 PLogObjectParent(B,b->A); 1549 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1550 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1551 PLogObjectParent(B,b->B); 1552 1553 /* build cache for off array entries formed */ 1554 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1555 b->donotstash = 0; 1556 b->colmap = 0; 1557 b->garray = 0; 1558 b->roworiented = 1; 1559 1560 /* stuff used for matrix vector multiply */ 1561 b->lvec = 0; 1562 b->Mvctx = 0; 1563 1564 /* stuff for MatGetRow() */ 1565 b->rowindices = 0; 1566 b->rowvalues = 0; 1567 b->getrowactive = PETSC_FALSE; 1568 1569 *A = B; 1570 return 0; 1571 } 1572 1573 #undef __FUNC__ 1574 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1575 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1576 { 1577 Mat mat; 1578 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1579 int ierr, len=0, flg; 1580 1581 *newmat = 0; 1582 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1583 PLogObjectCreate(mat); 1584 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1585 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1586 mat->destroy = MatDestroy_MPIAIJ; 1587 mat->view = MatView_MPIAIJ; 1588 mat->factor = matin->factor; 1589 mat->assembled = PETSC_TRUE; 1590 1591 a->m = mat->m = oldmat->m; 1592 a->n = mat->n = oldmat->n; 1593 a->M = mat->M = oldmat->M; 1594 a->N = mat->N = oldmat->N; 1595 1596 a->rstart = oldmat->rstart; 1597 a->rend = oldmat->rend; 1598 a->cstart = oldmat->cstart; 1599 a->cend = oldmat->cend; 1600 a->size = oldmat->size; 1601 a->rank = oldmat->rank; 1602 mat->insertmode = NOT_SET_VALUES; 1603 a->rowvalues = 0; 1604 a->getrowactive = PETSC_FALSE; 1605 1606 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1607 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1608 a->cowners = a->rowners + a->size + 2; 1609 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1610 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1611 if (oldmat->colmap) { 1612 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1613 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1614 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1615 } else a->colmap = 0; 1616 if (oldmat->garray) { 1617 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1618 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1619 PLogObjectMemory(mat,len*sizeof(int)); 1620 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1621 } else a->garray = 0; 1622 1623 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1624 PLogObjectParent(mat,a->lvec); 1625 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1626 PLogObjectParent(mat,a->Mvctx); 1627 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1628 PLogObjectParent(mat,a->A); 1629 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1630 PLogObjectParent(mat,a->B); 1631 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1632 if (flg) { 1633 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1634 } 1635 *newmat = mat; 1636 return 0; 1637 } 1638 1639 #include "sys.h" 1640 1641 #undef __FUNC__ 1642 #define __FUNC__ "MatLoad_MPIAIJ" 1643 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1644 { 1645 Mat A; 1646 int i, nz, ierr, j,rstart, rend, fd; 1647 Scalar *vals,*svals; 1648 MPI_Comm comm = ((PetscObject)viewer)->comm; 1649 MPI_Status status; 1650 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1651 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1652 int tag = ((PetscObject)viewer)->tag; 1653 1654 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1655 if (!rank) { 1656 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1657 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1658 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1659 } 1660 1661 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1662 M = header[1]; N = header[2]; 1663 /* determine ownership of all rows */ 1664 m = M/size + ((M % size) > rank); 1665 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1666 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1667 rowners[0] = 0; 1668 for ( i=2; i<=size; i++ ) { 1669 rowners[i] += rowners[i-1]; 1670 } 1671 rstart = rowners[rank]; 1672 rend = rowners[rank+1]; 1673 1674 /* distribute row lengths to all processors */ 1675 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1676 offlens = ourlens + (rend-rstart); 1677 if (!rank) { 1678 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1679 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1680 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1681 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1682 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1683 PetscFree(sndcounts); 1684 } 1685 else { 1686 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1687 } 1688 1689 if (!rank) { 1690 /* calculate the number of nonzeros on each processor */ 1691 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1692 PetscMemzero(procsnz,size*sizeof(int)); 1693 for ( i=0; i<size; i++ ) { 1694 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1695 procsnz[i] += rowlengths[j]; 1696 } 1697 } 1698 PetscFree(rowlengths); 1699 1700 /* determine max buffer needed and allocate it */ 1701 maxnz = 0; 1702 for ( i=0; i<size; i++ ) { 1703 maxnz = PetscMax(maxnz,procsnz[i]); 1704 } 1705 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1706 1707 /* read in my part of the matrix column indices */ 1708 nz = procsnz[0]; 1709 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1710 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1711 1712 /* read in every one elses and ship off */ 1713 for ( i=1; i<size; i++ ) { 1714 nz = procsnz[i]; 1715 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1716 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1717 } 1718 PetscFree(cols); 1719 } 1720 else { 1721 /* determine buffer space needed for message */ 1722 nz = 0; 1723 for ( i=0; i<m; i++ ) { 1724 nz += ourlens[i]; 1725 } 1726 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1727 1728 /* receive message of column indices*/ 1729 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1730 MPI_Get_count(&status,MPI_INT,&maxnz); 1731 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1732 } 1733 1734 /* loop over local rows, determining number of off diagonal entries */ 1735 PetscMemzero(offlens,m*sizeof(int)); 1736 jj = 0; 1737 for ( i=0; i<m; i++ ) { 1738 for ( j=0; j<ourlens[i]; j++ ) { 1739 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1740 jj++; 1741 } 1742 } 1743 1744 /* create our matrix */ 1745 for ( i=0; i<m; i++ ) { 1746 ourlens[i] -= offlens[i]; 1747 } 1748 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1749 A = *newmat; 1750 MatSetOption(A,MAT_COLUMNS_SORTED); 1751 for ( i=0; i<m; i++ ) { 1752 ourlens[i] += offlens[i]; 1753 } 1754 1755 if (!rank) { 1756 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1757 1758 /* read in my part of the matrix numerical values */ 1759 nz = procsnz[0]; 1760 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1761 1762 /* insert into matrix */ 1763 jj = rstart; 1764 smycols = mycols; 1765 svals = vals; 1766 for ( i=0; i<m; i++ ) { 1767 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1768 smycols += ourlens[i]; 1769 svals += ourlens[i]; 1770 jj++; 1771 } 1772 1773 /* read in other processors and ship out */ 1774 for ( i=1; i<size; i++ ) { 1775 nz = procsnz[i]; 1776 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1777 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1778 } 1779 PetscFree(procsnz); 1780 } 1781 else { 1782 /* receive numeric values */ 1783 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1784 1785 /* receive message of values*/ 1786 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1787 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1788 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1789 1790 /* insert into matrix */ 1791 jj = rstart; 1792 smycols = mycols; 1793 svals = vals; 1794 for ( i=0; i<m; i++ ) { 1795 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1796 smycols += ourlens[i]; 1797 svals += ourlens[i]; 1798 jj++; 1799 } 1800 } 1801 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1802 1803 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1804 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1805 return 0; 1806 } 1807