1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.192 1997/03/09 17:58:24 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 #undef __FUNC__ 15 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20 int n = B->n,i; 21 22 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 23 PLogObjectMemory(mat,aij->N*sizeof(int)); 24 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 25 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 26 return 0; 27 } 28 29 extern int DisAssemble_MPIAIJ(Mat); 30 31 #undef __FUNC__ 32 #define __FUNC__ "MatGetRowIJ_MPIAIJ" 33 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 34 PetscTruth *done) 35 { 36 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 37 int ierr; 38 if (aij->size == 1) { 39 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 40 } else SETERRQ(1,0,"not supported in parallel"); 41 return 0; 42 } 43 44 #undef __FUNC__ 45 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 46 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 47 PetscTruth *done) 48 { 49 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 50 int ierr; 51 if (aij->size == 1) { 52 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 53 } else SETERRQ(1,0,"not supported in parallel"); 54 return 0; 55 } 56 57 #define CHUNKSIZE 15 58 #define MatSetValues_SeqAIJ_A_Private(A,row,col,value,addv) \ 59 { \ 60 \ 61 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 62 rmax = imax[row]; nrow = ailen[row]; \ 63 col1 = col - shift; \ 64 \ 65 for ( _i=0; _i<nrow; _i++ ) { \ 66 if (rp[_i] > col1) break; \ 67 if (rp[_i] == col1) { \ 68 if (addv == ADD_VALUES) ap[_i] += value; \ 69 else ap[_i] = value; \ 70 goto _noinsert; \ 71 } \ 72 } \ 73 if (nonew) goto _noinsert; \ 74 if (nrow >= rmax) { \ 75 /* there is no extra room in row, therefore enlarge */ \ 76 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 77 Scalar *new_a; \ 78 \ 79 /* malloc new storage space */ \ 80 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 81 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 82 new_j = (int *) (new_a + new_nz); \ 83 new_i = new_j + new_nz; \ 84 \ 85 /* copy over old data into new slots */ \ 86 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 87 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 88 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 89 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 90 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 91 len*sizeof(int)); \ 92 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 93 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 94 len*sizeof(Scalar)); \ 95 /* free up old matrix storage */ \ 96 \ 97 PetscFree(a->a); \ 98 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 99 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 100 a->singlemalloc = 1; \ 101 \ 102 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 103 rmax = imax[row] = imax[row] + CHUNKSIZE; \ 104 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 105 a->maxnz += CHUNKSIZE; \ 106 a->reallocs++; \ 107 } \ 108 N = nrow++ - 1; a->nz++; \ 109 /* shift up all the later entries in this row */ \ 110 for ( ii=N; ii>=_i; ii-- ) { \ 111 rp[ii+1] = rp[ii]; \ 112 ap[ii+1] = ap[ii]; \ 113 } \ 114 rp[_i] = col1; \ 115 ap[_i] = value; \ 116 _noinsert: ; \ 117 ailen[row] = nrow; \ 118 } 119 120 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 121 #undef __FUNC__ 122 #define __FUNC__ "MatSetValues_MPIAIJ" 123 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 124 { 125 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 126 Scalar value; 127 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 128 int cstart = aij->cstart, cend = aij->cend,row,col; 129 int roworiented = aij->roworiented; 130 131 /* Some Variables required in the macro */ 132 Mat A = aij->A; 133 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 134 int *rp,ii,nrow,_i,rmax, N, col1; 135 int *imax = a->imax, *ai = a->i, *ailen = a->ilen; 136 int *aj = a->j, nonew = a->nonew,shift = a->indexshift; 137 Scalar *ap, *aa = a->a; 138 139 for ( i=0; i<m; i++ ) { 140 #if defined(PETSC_BOPT_g) 141 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 142 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 143 #endif 144 if (im[i] >= rstart && im[i] < rend) { 145 row = im[i] - rstart; 146 for ( j=0; j<n; j++ ) { 147 if (in[j] >= cstart && in[j] < cend){ 148 col = in[j] - cstart; 149 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 150 MatSetValues_SeqAIJ_A_Private(aij->A,row,col,value,addv); 151 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 152 } 153 #if defined(PETSC_BOPT_g) 154 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 155 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 156 #endif 157 else { 158 if (mat->was_assembled) { 159 if (!aij->colmap) { 160 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 161 } 162 col = aij->colmap[in[j]] - 1; 163 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 164 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 165 col = in[j]; 166 } 167 } 168 else col = in[j]; 169 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 170 ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 171 } 172 } 173 } 174 else { 175 if (roworiented && !aij->donotstash) { 176 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 177 } 178 else { 179 if (!aij->donotstash) { 180 row = im[i]; 181 for ( j=0; j<n; j++ ) { 182 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 183 } 184 } 185 } 186 } 187 } 188 return 0; 189 } 190 191 #undef __FUNC__ 192 #define __FUNC__ "MatGetValues_MPIAIJ" 193 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 194 { 195 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 196 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 197 int cstart = aij->cstart, cend = aij->cend,row,col; 198 199 for ( i=0; i<m; i++ ) { 200 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 201 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 202 if (idxm[i] >= rstart && idxm[i] < rend) { 203 row = idxm[i] - rstart; 204 for ( j=0; j<n; j++ ) { 205 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 206 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 207 if (idxn[j] >= cstart && idxn[j] < cend){ 208 col = idxn[j] - cstart; 209 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 210 } 211 else { 212 if (!aij->colmap) { 213 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 214 } 215 col = aij->colmap[idxn[j]] - 1; 216 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 217 else { 218 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 219 } 220 } 221 } 222 } 223 else { 224 SETERRQ(1,0,"Only local values currently supported"); 225 } 226 } 227 return 0; 228 } 229 230 #undef __FUNC__ 231 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 232 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 233 { 234 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 235 MPI_Comm comm = mat->comm; 236 int size = aij->size, *owners = aij->rowners; 237 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 238 MPI_Request *send_waits,*recv_waits; 239 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 240 InsertMode addv; 241 Scalar *rvalues,*svalues; 242 243 /* make sure all processors are either in INSERTMODE or ADDMODE */ 244 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 245 if (addv == (ADD_VALUES|INSERT_VALUES)) { 246 SETERRQ(1,0,"Some processors inserted others added"); 247 } 248 mat->insertmode = addv; /* in case this processor had no cache */ 249 250 /* first count number of contributors to each processor */ 251 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 252 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 253 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 254 for ( i=0; i<aij->stash.n; i++ ) { 255 idx = aij->stash.idx[i]; 256 for ( j=0; j<size; j++ ) { 257 if (idx >= owners[j] && idx < owners[j+1]) { 258 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 259 } 260 } 261 } 262 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 263 264 /* inform other processors of number of messages and max length*/ 265 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 266 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 267 nreceives = work[rank]; 268 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 269 nmax = work[rank]; 270 PetscFree(work); 271 272 /* post receives: 273 1) each message will consist of ordered pairs 274 (global index,value) we store the global index as a double 275 to simplify the message passing. 276 2) since we don't know how long each individual message is we 277 allocate the largest needed buffer for each receive. Potentially 278 this is a lot of wasted space. 279 280 281 This could be done better. 282 */ 283 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 284 CHKPTRQ(rvalues); 285 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 286 CHKPTRQ(recv_waits); 287 for ( i=0; i<nreceives; i++ ) { 288 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 289 comm,recv_waits+i); 290 } 291 292 /* do sends: 293 1) starts[i] gives the starting index in svalues for stuff going to 294 the ith processor 295 */ 296 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 297 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 298 CHKPTRQ(send_waits); 299 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 300 starts[0] = 0; 301 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 302 for ( i=0; i<aij->stash.n; i++ ) { 303 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 304 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 305 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 306 } 307 PetscFree(owner); 308 starts[0] = 0; 309 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 310 count = 0; 311 for ( i=0; i<size; i++ ) { 312 if (procs[i]) { 313 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 314 comm,send_waits+count++); 315 } 316 } 317 PetscFree(starts); PetscFree(nprocs); 318 319 /* Free cache space */ 320 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 321 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 322 323 aij->svalues = svalues; aij->rvalues = rvalues; 324 aij->nsends = nsends; aij->nrecvs = nreceives; 325 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 326 aij->rmax = nmax; 327 328 return 0; 329 } 330 extern int MatSetUpMultiply_MPIAIJ(Mat); 331 332 #undef __FUNC__ 333 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 334 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 335 { 336 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 337 MPI_Status *send_status,recv_status; 338 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 339 int row,col,other_disassembled; 340 Scalar *values,val; 341 InsertMode addv = mat->insertmode; 342 343 /* wait on receives */ 344 while (count) { 345 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 346 /* unpack receives into our local space */ 347 values = aij->rvalues + 3*imdex*aij->rmax; 348 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 349 n = n/3; 350 for ( i=0; i<n; i++ ) { 351 row = (int) PetscReal(values[3*i]) - aij->rstart; 352 col = (int) PetscReal(values[3*i+1]); 353 val = values[3*i+2]; 354 if (col >= aij->cstart && col < aij->cend) { 355 col -= aij->cstart; 356 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 357 } 358 else { 359 if (mat->was_assembled || mat->assembled) { 360 if (!aij->colmap) { 361 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 362 } 363 col = aij->colmap[col] - 1; 364 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 365 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 366 col = (int) PetscReal(values[3*i+1]); 367 } 368 } 369 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 370 } 371 } 372 count--; 373 } 374 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 375 376 /* wait on sends */ 377 if (aij->nsends) { 378 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 379 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 380 PetscFree(send_status); 381 } 382 PetscFree(aij->send_waits); PetscFree(aij->svalues); 383 384 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 385 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 386 387 /* determine if any processor has disassembled, if so we must 388 also disassemble ourselfs, in order that we may reassemble. */ 389 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 390 if (mat->was_assembled && !other_disassembled) { 391 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 396 } 397 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 399 400 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 401 return 0; 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatZeroEntries_MPIAIJ" 406 static int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 409 int ierr; 410 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 411 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 412 return 0; 413 } 414 415 /* the code does not do the diagonal entries correctly unless the 416 matrix is square and the column and row owerships are identical. 417 This is a BUG. The only way to fix it seems to be to access 418 aij->A and aij->B directly and not through the MatZeroRows() 419 routine. 420 */ 421 #undef __FUNC__ 422 #define __FUNC__ "MatZeroRows_MPIAIJ" 423 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 424 { 425 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 426 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 427 int *procs,*nprocs,j,found,idx,nsends,*work; 428 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 429 int *rvalues,tag = A->tag,count,base,slen,n,*source; 430 int *lens,imdex,*lrows,*values; 431 MPI_Comm comm = A->comm; 432 MPI_Request *send_waits,*recv_waits; 433 MPI_Status recv_status,*send_status; 434 IS istmp; 435 436 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 437 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 438 439 /* first count number of contributors to each processor */ 440 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 441 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 442 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 443 for ( i=0; i<N; i++ ) { 444 idx = rows[i]; 445 found = 0; 446 for ( j=0; j<size; j++ ) { 447 if (idx >= owners[j] && idx < owners[j+1]) { 448 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 449 } 450 } 451 if (!found) SETERRQ(1,0,"Index out of range"); 452 } 453 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 454 455 /* inform other processors of number of messages and max length*/ 456 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 457 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 458 nrecvs = work[rank]; 459 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 460 nmax = work[rank]; 461 PetscFree(work); 462 463 /* post receives: */ 464 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 465 CHKPTRQ(rvalues); 466 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 467 CHKPTRQ(recv_waits); 468 for ( i=0; i<nrecvs; i++ ) { 469 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 470 } 471 472 /* do sends: 473 1) starts[i] gives the starting index in svalues for stuff going to 474 the ith processor 475 */ 476 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 477 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 478 CHKPTRQ(send_waits); 479 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 480 starts[0] = 0; 481 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 482 for ( i=0; i<N; i++ ) { 483 svalues[starts[owner[i]]++] = rows[i]; 484 } 485 ISRestoreIndices(is,&rows); 486 487 starts[0] = 0; 488 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 489 count = 0; 490 for ( i=0; i<size; i++ ) { 491 if (procs[i]) { 492 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 493 } 494 } 495 PetscFree(starts); 496 497 base = owners[rank]; 498 499 /* wait on receives */ 500 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 501 source = lens + nrecvs; 502 count = nrecvs; slen = 0; 503 while (count) { 504 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 505 /* unpack receives into our local space */ 506 MPI_Get_count(&recv_status,MPI_INT,&n); 507 source[imdex] = recv_status.MPI_SOURCE; 508 lens[imdex] = n; 509 slen += n; 510 count--; 511 } 512 PetscFree(recv_waits); 513 514 /* move the data into the send scatter */ 515 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 516 count = 0; 517 for ( i=0; i<nrecvs; i++ ) { 518 values = rvalues + i*nmax; 519 for ( j=0; j<lens[i]; j++ ) { 520 lrows[count++] = values[j] - base; 521 } 522 } 523 PetscFree(rvalues); PetscFree(lens); 524 PetscFree(owner); PetscFree(nprocs); 525 526 /* actually zap the local rows */ 527 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 528 PLogObjectParent(A,istmp); 529 PetscFree(lrows); 530 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 531 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 532 ierr = ISDestroy(istmp); CHKERRQ(ierr); 533 534 /* wait on sends */ 535 if (nsends) { 536 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 537 CHKPTRQ(send_status); 538 MPI_Waitall(nsends,send_waits,send_status); 539 PetscFree(send_status); 540 } 541 PetscFree(send_waits); PetscFree(svalues); 542 543 return 0; 544 } 545 546 #undef __FUNC__ 547 #define __FUNC__ "MatMult_MPIAIJ" 548 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 549 { 550 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 551 int ierr,nt; 552 553 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 554 if (nt != a->n) { 555 SETERRQ(1,0,"Incompatible partition of A and xx"); 556 } 557 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 558 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 559 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 560 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 561 return 0; 562 } 563 564 #undef __FUNC__ 565 #define __FUNC__ "MatMultAdd_MPIAIJ" 566 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 567 { 568 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 569 int ierr; 570 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 571 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 572 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 573 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 574 return 0; 575 } 576 577 #undef __FUNC__ 578 #define __FUNC__ "MatMultTrans_MPIAIJ" 579 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 580 { 581 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 582 int ierr; 583 584 /* do nondiagonal part */ 585 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 586 /* send it on its way */ 587 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 588 /* do local part */ 589 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 590 /* receive remote parts: note this assumes the values are not actually */ 591 /* inserted in yy until the next line, which is true for my implementation*/ 592 /* but is not perhaps always true. */ 593 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 594 return 0; 595 } 596 597 #undef __FUNC__ 598 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 599 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 602 int ierr; 603 604 /* do nondiagonal part */ 605 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 606 /* send it on its way */ 607 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 608 /* do local part */ 609 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 610 /* receive remote parts: note this assumes the values are not actually */ 611 /* inserted in yy until the next line, which is true for my implementation*/ 612 /* but is not perhaps always true. */ 613 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 614 return 0; 615 } 616 617 /* 618 This only works correctly for square matrices where the subblock A->A is the 619 diagonal block 620 */ 621 #undef __FUNC__ 622 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 623 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 624 { 625 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 626 if (a->M != a->N) 627 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 628 if (a->rstart != a->cstart || a->rend != a->cend) { 629 SETERRQ(1,0,"row partition must equal col partition"); } 630 return MatGetDiagonal(a->A,v); 631 } 632 633 #undef __FUNC__ 634 #define __FUNC__ "MatScale_MPIAIJ" 635 static int MatScale_MPIAIJ(Scalar *aa,Mat A) 636 { 637 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 638 int ierr; 639 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 640 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 641 return 0; 642 } 643 644 #undef __FUNC__ 645 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 646 static int MatDestroy_MPIAIJ(PetscObject obj) 647 { 648 Mat mat = (Mat) obj; 649 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 650 int ierr; 651 #if defined(PETSC_LOG) 652 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 653 #endif 654 PetscFree(aij->rowners); 655 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 656 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 657 if (aij->colmap) PetscFree(aij->colmap); 658 if (aij->garray) PetscFree(aij->garray); 659 if (aij->lvec) VecDestroy(aij->lvec); 660 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 661 if (aij->rowvalues) PetscFree(aij->rowvalues); 662 PetscFree(aij); 663 if (mat->mapping) { 664 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 665 } 666 PLogObjectDestroy(mat); 667 PetscHeaderDestroy(mat); 668 return 0; 669 } 670 #include "draw.h" 671 #include "pinclude/pviewer.h" 672 673 #undef __FUNC__ 674 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 675 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 676 { 677 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 678 int ierr; 679 680 if (aij->size == 1) { 681 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 682 } 683 else SETERRQ(1,0,"Only uniprocessor output supported"); 684 return 0; 685 } 686 687 #undef __FUNC__ 688 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 689 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 690 { 691 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 692 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 693 int ierr, format,shift = C->indexshift,rank; 694 FILE *fd; 695 ViewerType vtype; 696 697 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 698 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 699 ierr = ViewerGetFormat(viewer,&format); 700 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 701 MatInfo info; 702 int flg; 703 MPI_Comm_rank(mat->comm,&rank); 704 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 705 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 706 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 707 PetscSequentialPhaseBegin(mat->comm,1); 708 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 709 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 710 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 711 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 712 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 713 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 714 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 715 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 716 fflush(fd); 717 PetscSequentialPhaseEnd(mat->comm,1); 718 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 719 return 0; 720 } 721 else if (format == VIEWER_FORMAT_ASCII_INFO) { 722 return 0; 723 } 724 } 725 726 if (vtype == DRAW_VIEWER) { 727 Draw draw; 728 PetscTruth isnull; 729 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 730 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 731 } 732 733 if (vtype == ASCII_FILE_VIEWER) { 734 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 735 PetscSequentialPhaseBegin(mat->comm,1); 736 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 737 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 738 aij->cend); 739 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 740 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 741 fflush(fd); 742 PetscSequentialPhaseEnd(mat->comm,1); 743 } 744 else { 745 int size = aij->size; 746 rank = aij->rank; 747 if (size == 1) { 748 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 749 } 750 else { 751 /* assemble the entire matrix onto first processor. */ 752 Mat A; 753 Mat_SeqAIJ *Aloc; 754 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 755 Scalar *a; 756 757 if (!rank) { 758 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 759 CHKERRQ(ierr); 760 } 761 else { 762 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 763 CHKERRQ(ierr); 764 } 765 PLogObjectParent(mat,A); 766 767 /* copy over the A part */ 768 Aloc = (Mat_SeqAIJ*) aij->A->data; 769 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 770 row = aij->rstart; 771 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 772 for ( i=0; i<m; i++ ) { 773 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 774 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 775 } 776 aj = Aloc->j; 777 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 778 779 /* copy over the B part */ 780 Aloc = (Mat_SeqAIJ*) aij->B->data; 781 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 782 row = aij->rstart; 783 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 784 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 785 for ( i=0; i<m; i++ ) { 786 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 787 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 788 } 789 PetscFree(ct); 790 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 791 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 792 if (!rank) { 793 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 794 } 795 ierr = MatDestroy(A); CHKERRQ(ierr); 796 } 797 } 798 return 0; 799 } 800 801 #undef __FUNC__ 802 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 803 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 804 { 805 Mat mat = (Mat) obj; 806 int ierr; 807 ViewerType vtype; 808 809 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 810 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 811 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 812 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 813 } 814 else if (vtype == BINARY_FILE_VIEWER) { 815 return MatView_MPIAIJ_Binary(mat,viewer); 816 } 817 return 0; 818 } 819 820 /* 821 This has to provide several versions. 822 823 2) a) use only local smoothing updating outer values only once. 824 b) local smoothing updating outer values each inner iteration 825 3) color updating out values betwen colors. 826 */ 827 #undef __FUNC__ 828 #define __FUNC__ "MatRelax_MPIAIJ" 829 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 830 double fshift,int its,Vec xx) 831 { 832 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 833 Mat AA = mat->A, BB = mat->B; 834 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 835 Scalar *b,*x,*xs,*ls,d,*v,sum; 836 int ierr,*idx, *diag; 837 int n = mat->n, m = mat->m, i,shift = A->indexshift; 838 839 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 840 xs = x + shift; /* shift by one for index start of 1 */ 841 ls = ls + shift; 842 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 843 diag = A->diag; 844 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 845 if (flag & SOR_ZERO_INITIAL_GUESS) { 846 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 847 } 848 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 849 CHKERRQ(ierr); 850 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 851 CHKERRQ(ierr); 852 while (its--) { 853 /* go down through the rows */ 854 for ( i=0; i<m; i++ ) { 855 n = A->i[i+1] - A->i[i]; 856 idx = A->j + A->i[i] + shift; 857 v = A->a + A->i[i] + shift; 858 sum = b[i]; 859 SPARSEDENSEMDOT(sum,xs,v,idx,n); 860 d = fshift + A->a[diag[i]+shift]; 861 n = B->i[i+1] - B->i[i]; 862 idx = B->j + B->i[i] + shift; 863 v = B->a + B->i[i] + shift; 864 SPARSEDENSEMDOT(sum,ls,v,idx,n); 865 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 866 } 867 /* come up through the rows */ 868 for ( i=m-1; i>-1; i-- ) { 869 n = A->i[i+1] - A->i[i]; 870 idx = A->j + A->i[i] + shift; 871 v = A->a + A->i[i] + shift; 872 sum = b[i]; 873 SPARSEDENSEMDOT(sum,xs,v,idx,n); 874 d = fshift + A->a[diag[i]+shift]; 875 n = B->i[i+1] - B->i[i]; 876 idx = B->j + B->i[i] + shift; 877 v = B->a + B->i[i] + shift; 878 SPARSEDENSEMDOT(sum,ls,v,idx,n); 879 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 880 } 881 } 882 } 883 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 884 if (flag & SOR_ZERO_INITIAL_GUESS) { 885 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 886 } 887 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 888 CHKERRQ(ierr); 889 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 890 CHKERRQ(ierr); 891 while (its--) { 892 for ( i=0; i<m; i++ ) { 893 n = A->i[i+1] - A->i[i]; 894 idx = A->j + A->i[i] + shift; 895 v = A->a + A->i[i] + shift; 896 sum = b[i]; 897 SPARSEDENSEMDOT(sum,xs,v,idx,n); 898 d = fshift + A->a[diag[i]+shift]; 899 n = B->i[i+1] - B->i[i]; 900 idx = B->j + B->i[i] + shift; 901 v = B->a + B->i[i] + shift; 902 SPARSEDENSEMDOT(sum,ls,v,idx,n); 903 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 904 } 905 } 906 } 907 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 908 if (flag & SOR_ZERO_INITIAL_GUESS) { 909 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 910 } 911 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 912 mat->Mvctx); CHKERRQ(ierr); 913 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 914 mat->Mvctx); CHKERRQ(ierr); 915 while (its--) { 916 for ( i=m-1; i>-1; i-- ) { 917 n = A->i[i+1] - A->i[i]; 918 idx = A->j + A->i[i] + shift; 919 v = A->a + A->i[i] + shift; 920 sum = b[i]; 921 SPARSEDENSEMDOT(sum,xs,v,idx,n); 922 d = fshift + A->a[diag[i]+shift]; 923 n = B->i[i+1] - B->i[i]; 924 idx = B->j + B->i[i] + shift; 925 v = B->a + B->i[i] + shift; 926 SPARSEDENSEMDOT(sum,ls,v,idx,n); 927 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 928 } 929 } 930 } 931 else { 932 SETERRQ(1,0,"Option not supported"); 933 } 934 return 0; 935 } 936 937 #undef __FUNC__ 938 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 939 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 940 { 941 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 942 Mat A = mat->A, B = mat->B; 943 int ierr; 944 double isend[5], irecv[5]; 945 946 info->rows_global = (double)mat->M; 947 info->columns_global = (double)mat->N; 948 info->rows_local = (double)mat->m; 949 info->columns_local = (double)mat->N; 950 info->block_size = 1.0; 951 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 952 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 953 isend[3] = info->memory; isend[4] = info->mallocs; 954 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 955 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 956 isend[3] += info->memory; isend[4] += info->mallocs; 957 if (flag == MAT_LOCAL) { 958 info->nz_used = isend[0]; 959 info->nz_allocated = isend[1]; 960 info->nz_unneeded = isend[2]; 961 info->memory = isend[3]; 962 info->mallocs = isend[4]; 963 } else if (flag == MAT_GLOBAL_MAX) { 964 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 965 info->nz_used = irecv[0]; 966 info->nz_allocated = irecv[1]; 967 info->nz_unneeded = irecv[2]; 968 info->memory = irecv[3]; 969 info->mallocs = irecv[4]; 970 } else if (flag == MAT_GLOBAL_SUM) { 971 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 972 info->nz_used = irecv[0]; 973 info->nz_allocated = irecv[1]; 974 info->nz_unneeded = irecv[2]; 975 info->memory = irecv[3]; 976 info->mallocs = irecv[4]; 977 } 978 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 979 info->fill_ratio_needed = 0; 980 info->factor_mallocs = 0; 981 982 return 0; 983 } 984 985 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 986 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 987 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 988 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 989 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 990 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 991 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 992 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 993 994 #undef __FUNC__ 995 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 996 static int MatSetOption_MPIAIJ(Mat A,MatOption op) 997 { 998 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 999 1000 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1001 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1002 op == MAT_COLUMNS_UNSORTED || 1003 op == MAT_COLUMNS_SORTED || 1004 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1005 MatSetOption(a->A,op); 1006 MatSetOption(a->B,op); 1007 } else if (op == MAT_ROW_ORIENTED) { 1008 a->roworiented = 1; 1009 MatSetOption(a->A,op); 1010 MatSetOption(a->B,op); 1011 } else if (op == MAT_ROWS_SORTED || 1012 op == MAT_ROWS_UNSORTED || 1013 op == MAT_SYMMETRIC || 1014 op == MAT_STRUCTURALLY_SYMMETRIC || 1015 op == MAT_YES_NEW_DIAGONALS) 1016 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1017 else if (op == MAT_COLUMN_ORIENTED) { 1018 a->roworiented = 0; 1019 MatSetOption(a->A,op); 1020 MatSetOption(a->B,op); 1021 } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 1022 a->donotstash = 1; 1023 } else if (op == MAT_NO_NEW_DIAGONALS) 1024 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1025 else 1026 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1027 return 0; 1028 } 1029 1030 #undef __FUNC__ 1031 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 1032 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1033 { 1034 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1035 *m = mat->M; *n = mat->N; 1036 return 0; 1037 } 1038 1039 #undef __FUNC__ 1040 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 1041 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1042 { 1043 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1044 *m = mat->m; *n = mat->N; 1045 return 0; 1046 } 1047 1048 #undef __FUNC__ 1049 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 1050 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1051 { 1052 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1053 *m = mat->rstart; *n = mat->rend; 1054 return 0; 1055 } 1056 1057 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1058 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1059 1060 #undef __FUNC__ 1061 #define __FUNC__ "MatGetRow_MPIAIJ" 1062 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1063 { 1064 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1065 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1066 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1067 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1068 int *cmap, *idx_p; 1069 1070 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1071 mat->getrowactive = PETSC_TRUE; 1072 1073 if (!mat->rowvalues && (idx || v)) { 1074 /* 1075 allocate enough space to hold information from the longest row. 1076 */ 1077 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1078 int max = 1,m = mat->m,tmp; 1079 for ( i=0; i<m; i++ ) { 1080 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1081 if (max < tmp) { max = tmp; } 1082 } 1083 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1084 CHKPTRQ(mat->rowvalues); 1085 mat->rowindices = (int *) (mat->rowvalues + max); 1086 } 1087 1088 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1089 lrow = row - rstart; 1090 1091 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1092 if (!v) {pvA = 0; pvB = 0;} 1093 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1094 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1095 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1096 nztot = nzA + nzB; 1097 1098 cmap = mat->garray; 1099 if (v || idx) { 1100 if (nztot) { 1101 /* Sort by increasing column numbers, assuming A and B already sorted */ 1102 int imark = -1; 1103 if (v) { 1104 *v = v_p = mat->rowvalues; 1105 for ( i=0; i<nzB; i++ ) { 1106 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1107 else break; 1108 } 1109 imark = i; 1110 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1111 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1112 } 1113 if (idx) { 1114 *idx = idx_p = mat->rowindices; 1115 if (imark > -1) { 1116 for ( i=0; i<imark; i++ ) { 1117 idx_p[i] = cmap[cworkB[i]]; 1118 } 1119 } else { 1120 for ( i=0; i<nzB; i++ ) { 1121 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1122 else break; 1123 } 1124 imark = i; 1125 } 1126 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1127 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1128 } 1129 } 1130 else { 1131 if (idx) *idx = 0; 1132 if (v) *v = 0; 1133 } 1134 } 1135 *nz = nztot; 1136 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1137 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1138 return 0; 1139 } 1140 1141 #undef __FUNC__ 1142 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 1143 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1144 { 1145 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1146 if (aij->getrowactive == PETSC_FALSE) { 1147 SETERRQ(1,0,"MatGetRow not called"); 1148 } 1149 aij->getrowactive = PETSC_FALSE; 1150 return 0; 1151 } 1152 1153 #undef __FUNC__ 1154 #define __FUNC__ "MatNorm_MPIAIJ" 1155 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1156 { 1157 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1158 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1159 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1160 double sum = 0.0; 1161 Scalar *v; 1162 1163 if (aij->size == 1) { 1164 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1165 } else { 1166 if (type == NORM_FROBENIUS) { 1167 v = amat->a; 1168 for (i=0; i<amat->nz; i++ ) { 1169 #if defined(PETSC_COMPLEX) 1170 sum += real(conj(*v)*(*v)); v++; 1171 #else 1172 sum += (*v)*(*v); v++; 1173 #endif 1174 } 1175 v = bmat->a; 1176 for (i=0; i<bmat->nz; i++ ) { 1177 #if defined(PETSC_COMPLEX) 1178 sum += real(conj(*v)*(*v)); v++; 1179 #else 1180 sum += (*v)*(*v); v++; 1181 #endif 1182 } 1183 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1184 *norm = sqrt(*norm); 1185 } 1186 else if (type == NORM_1) { /* max column norm */ 1187 double *tmp, *tmp2; 1188 int *jj, *garray = aij->garray; 1189 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1190 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1191 PetscMemzero(tmp,aij->N*sizeof(double)); 1192 *norm = 0.0; 1193 v = amat->a; jj = amat->j; 1194 for ( j=0; j<amat->nz; j++ ) { 1195 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1196 } 1197 v = bmat->a; jj = bmat->j; 1198 for ( j=0; j<bmat->nz; j++ ) { 1199 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1200 } 1201 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1202 for ( j=0; j<aij->N; j++ ) { 1203 if (tmp2[j] > *norm) *norm = tmp2[j]; 1204 } 1205 PetscFree(tmp); PetscFree(tmp2); 1206 } 1207 else if (type == NORM_INFINITY) { /* max row norm */ 1208 double ntemp = 0.0; 1209 for ( j=0; j<amat->m; j++ ) { 1210 v = amat->a + amat->i[j] + shift; 1211 sum = 0.0; 1212 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1213 sum += PetscAbsScalar(*v); v++; 1214 } 1215 v = bmat->a + bmat->i[j] + shift; 1216 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1217 sum += PetscAbsScalar(*v); v++; 1218 } 1219 if (sum > ntemp) ntemp = sum; 1220 } 1221 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1222 } 1223 else { 1224 SETERRQ(1,0,"No support for two norm"); 1225 } 1226 } 1227 return 0; 1228 } 1229 1230 #undef __FUNC__ 1231 #define __FUNC__ "MatTranspose_MPIAIJ" 1232 static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1233 { 1234 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1235 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1236 int ierr,shift = Aloc->indexshift; 1237 Mat B; 1238 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1239 Scalar *array; 1240 1241 if (matout == PETSC_NULL && M != N) 1242 SETERRQ(1,0,"Square matrix only for in-place"); 1243 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1244 PETSC_NULL,&B); CHKERRQ(ierr); 1245 1246 /* copy over the A part */ 1247 Aloc = (Mat_SeqAIJ*) a->A->data; 1248 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1249 row = a->rstart; 1250 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1251 for ( i=0; i<m; i++ ) { 1252 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1253 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1254 } 1255 aj = Aloc->j; 1256 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1257 1258 /* copy over the B part */ 1259 Aloc = (Mat_SeqAIJ*) a->B->data; 1260 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1261 row = a->rstart; 1262 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1263 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1264 for ( i=0; i<m; i++ ) { 1265 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1266 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1267 } 1268 PetscFree(ct); 1269 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1270 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1271 if (matout != PETSC_NULL) { 1272 *matout = B; 1273 } else { 1274 /* This isn't really an in-place transpose .... but free data structures from a */ 1275 PetscFree(a->rowners); 1276 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1277 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1278 if (a->colmap) PetscFree(a->colmap); 1279 if (a->garray) PetscFree(a->garray); 1280 if (a->lvec) VecDestroy(a->lvec); 1281 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1282 PetscFree(a); 1283 PetscMemcpy(A,B,sizeof(struct _Mat)); 1284 PetscHeaderDestroy(B); 1285 } 1286 return 0; 1287 } 1288 1289 #undef __FUNC__ 1290 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1291 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1292 { 1293 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1294 Mat a = aij->A, b = aij->B; 1295 int ierr,s1,s2,s3; 1296 1297 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1298 if (rr) { 1299 s3 = aij->n; 1300 VecGetLocalSize_Fast(rr,s1); 1301 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1302 /* Overlap communication with computation. */ 1303 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1304 } 1305 if (ll) { 1306 VecGetLocalSize_Fast(ll,s1); 1307 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1308 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1309 } 1310 /* scale the diagonal block */ 1311 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1312 1313 if (rr) { 1314 /* Do a scatter end and then right scale the off-diagonal block */ 1315 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1316 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1317 } 1318 1319 return 0; 1320 } 1321 1322 1323 extern int MatPrintHelp_SeqAIJ(Mat); 1324 #undef __FUNC__ 1325 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 1326 static int MatPrintHelp_MPIAIJ(Mat A) 1327 { 1328 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1329 1330 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1331 else return 0; 1332 } 1333 1334 #undef __FUNC__ 1335 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 1336 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1337 { 1338 *bs = 1; 1339 return 0; 1340 } 1341 #undef __FUNC__ 1342 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 1343 static int MatSetUnfactored_MPIAIJ(Mat A) 1344 { 1345 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1346 int ierr; 1347 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1348 return 0; 1349 } 1350 1351 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1352 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1353 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1354 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1355 /* -------------------------------------------------------------------*/ 1356 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1357 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1358 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1359 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1360 0,0, 1361 0,0, 1362 0,0, 1363 MatRelax_MPIAIJ, 1364 MatTranspose_MPIAIJ, 1365 MatGetInfo_MPIAIJ,0, 1366 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1367 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1368 0, 1369 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1370 0,0,0,0, 1371 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1372 0,0, 1373 0,0,MatConvertSameType_MPIAIJ,0,0, 1374 0,0,0, 1375 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1376 MatPrintHelp_MPIAIJ, 1377 MatScale_MPIAIJ,0,0,0, 1378 MatGetBlockSize_MPIAIJ,0,0,0,0, 1379 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1380 1381 1382 #undef __FUNC__ 1383 #define __FUNC__ "MatCreateMPIAIJ" 1384 /*@C 1385 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1386 (the default parallel PETSc format). For good matrix assembly performance 1387 the user should preallocate the matrix storage by setting the parameters 1388 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1389 performance can be increased by more than a factor of 50. 1390 1391 Input Parameters: 1392 . comm - MPI communicator 1393 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1394 This value should be the same as the local size used in creating the 1395 y vector for the matrix-vector product y = Ax. 1396 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1397 This value should be the same as the local size used in creating the 1398 x vector for the matrix-vector product y = Ax. 1399 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1400 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1401 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1402 (same for all local rows) 1403 . d_nzz - array containing the number of nonzeros in the various rows of the 1404 diagonal portion of the local submatrix (possibly different for each row) 1405 or PETSC_NULL. You must leave room for the diagonal entry even if 1406 it is zero. 1407 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1408 submatrix (same for all local rows). 1409 . o_nzz - array containing the number of nonzeros in the various rows of the 1410 off-diagonal portion of the local submatrix (possibly different for 1411 each row) or PETSC_NULL. 1412 1413 Output Parameter: 1414 . A - the matrix 1415 1416 Notes: 1417 The AIJ format (also called the Yale sparse matrix format or 1418 compressed row storage), is fully compatible with standard Fortran 77 1419 storage. That is, the stored row and column indices can begin at 1420 either one (as in Fortran) or zero. See the users manual for details. 1421 1422 The user MUST specify either the local or global matrix dimensions 1423 (possibly both). 1424 1425 By default, this format uses inodes (identical nodes) when possible. 1426 We search for consecutive rows with the same nonzero structure, thereby 1427 reusing matrix information to achieve increased efficiency. 1428 1429 Options Database Keys: 1430 $ -mat_aij_no_inode - Do not use inodes 1431 $ -mat_aij_inode_limit <limit> - Set inode limit. 1432 $ (max limit=5) 1433 $ -mat_aij_oneindex - Internally use indexing starting at 1 1434 $ rather than 0. Note: When calling MatSetValues(), 1435 $ the user still MUST index entries starting at 0! 1436 1437 Storage Information: 1438 For a square global matrix we define each processor's diagonal portion 1439 to be its local rows and the corresponding columns (a square submatrix); 1440 each processor's off-diagonal portion encompasses the remainder of the 1441 local matrix (a rectangular submatrix). 1442 1443 The user can specify preallocated storage for the diagonal part of 1444 the local submatrix with either d_nz or d_nnz (not both). Set 1445 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1446 memory allocation. Likewise, specify preallocated storage for the 1447 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1448 1449 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1450 the figure below we depict these three local rows and all columns (0-11). 1451 1452 $ 0 1 2 3 4 5 6 7 8 9 10 11 1453 $ ------------------- 1454 $ row 3 | o o o d d d o o o o o o 1455 $ row 4 | o o o d d d o o o o o o 1456 $ row 5 | o o o d d d o o o o o o 1457 $ ------------------- 1458 $ 1459 1460 Thus, any entries in the d locations are stored in the d (diagonal) 1461 submatrix, and any entries in the o locations are stored in the 1462 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1463 stored simply in the MATSEQAIJ format for compressed row storage. 1464 1465 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1466 and o_nz should indicate the number of nonzeros per row in the o matrix. 1467 In general, for PDE problems in which most nonzeros are near the diagonal, 1468 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1469 or you will get TERRIBLE performance; see the users' manual chapter on 1470 matrices. 1471 1472 .keywords: matrix, aij, compressed row, sparse, parallel 1473 1474 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1475 @*/ 1476 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1477 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1478 { 1479 Mat B; 1480 Mat_MPIAIJ *b; 1481 int ierr, i,sum[2],work[2],size; 1482 1483 *A = 0; 1484 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1485 PLogObjectCreate(B); 1486 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1487 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1488 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1489 MPI_Comm_size(comm,&size); 1490 if (size == 1) { 1491 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1492 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1493 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1494 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1495 B->ops.lufactor = MatLUFactor_MPIAIJ; 1496 B->ops.solve = MatSolve_MPIAIJ; 1497 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1498 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1499 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1500 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1501 } 1502 B->destroy = MatDestroy_MPIAIJ; 1503 B->view = MatView_MPIAIJ; 1504 B->factor = 0; 1505 B->assembled = PETSC_FALSE; 1506 B->mapping = 0; 1507 1508 B->insertmode = NOT_SET_VALUES; 1509 MPI_Comm_rank(comm,&b->rank); 1510 MPI_Comm_size(comm,&b->size); 1511 1512 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1513 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1514 1515 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1516 work[0] = m; work[1] = n; 1517 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1518 if (M == PETSC_DECIDE) M = sum[0]; 1519 if (N == PETSC_DECIDE) N = sum[1]; 1520 } 1521 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1522 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1523 b->m = m; B->m = m; 1524 b->n = n; B->n = n; 1525 b->N = N; B->N = N; 1526 b->M = M; B->M = M; 1527 1528 /* build local table of row and column ownerships */ 1529 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1530 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1531 b->cowners = b->rowners + b->size + 2; 1532 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1533 b->rowners[0] = 0; 1534 for ( i=2; i<=b->size; i++ ) { 1535 b->rowners[i] += b->rowners[i-1]; 1536 } 1537 b->rstart = b->rowners[b->rank]; 1538 b->rend = b->rowners[b->rank+1]; 1539 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1540 b->cowners[0] = 0; 1541 for ( i=2; i<=b->size; i++ ) { 1542 b->cowners[i] += b->cowners[i-1]; 1543 } 1544 b->cstart = b->cowners[b->rank]; 1545 b->cend = b->cowners[b->rank+1]; 1546 1547 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1548 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1549 PLogObjectParent(B,b->A); 1550 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1551 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1552 PLogObjectParent(B,b->B); 1553 1554 /* build cache for off array entries formed */ 1555 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1556 b->donotstash = 0; 1557 b->colmap = 0; 1558 b->garray = 0; 1559 b->roworiented = 1; 1560 1561 /* stuff used for matrix vector multiply */ 1562 b->lvec = 0; 1563 b->Mvctx = 0; 1564 1565 /* stuff for MatGetRow() */ 1566 b->rowindices = 0; 1567 b->rowvalues = 0; 1568 b->getrowactive = PETSC_FALSE; 1569 1570 *A = B; 1571 return 0; 1572 } 1573 1574 #undef __FUNC__ 1575 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1576 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1577 { 1578 Mat mat; 1579 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1580 int ierr, len=0, flg; 1581 1582 *newmat = 0; 1583 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1584 PLogObjectCreate(mat); 1585 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1586 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1587 mat->destroy = MatDestroy_MPIAIJ; 1588 mat->view = MatView_MPIAIJ; 1589 mat->factor = matin->factor; 1590 mat->assembled = PETSC_TRUE; 1591 1592 a->m = mat->m = oldmat->m; 1593 a->n = mat->n = oldmat->n; 1594 a->M = mat->M = oldmat->M; 1595 a->N = mat->N = oldmat->N; 1596 1597 a->rstart = oldmat->rstart; 1598 a->rend = oldmat->rend; 1599 a->cstart = oldmat->cstart; 1600 a->cend = oldmat->cend; 1601 a->size = oldmat->size; 1602 a->rank = oldmat->rank; 1603 mat->insertmode = NOT_SET_VALUES; 1604 a->rowvalues = 0; 1605 a->getrowactive = PETSC_FALSE; 1606 1607 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1608 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1609 a->cowners = a->rowners + a->size + 2; 1610 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1611 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1612 if (oldmat->colmap) { 1613 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1614 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1615 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1616 } else a->colmap = 0; 1617 if (oldmat->garray) { 1618 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1619 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1620 PLogObjectMemory(mat,len*sizeof(int)); 1621 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1622 } else a->garray = 0; 1623 1624 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1625 PLogObjectParent(mat,a->lvec); 1626 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1627 PLogObjectParent(mat,a->Mvctx); 1628 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1629 PLogObjectParent(mat,a->A); 1630 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1631 PLogObjectParent(mat,a->B); 1632 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1633 if (flg) { 1634 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1635 } 1636 *newmat = mat; 1637 return 0; 1638 } 1639 1640 #include "sys.h" 1641 1642 #undef __FUNC__ 1643 #define __FUNC__ "MatLoad_MPIAIJ" 1644 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1645 { 1646 Mat A; 1647 int i, nz, ierr, j,rstart, rend, fd; 1648 Scalar *vals,*svals; 1649 MPI_Comm comm = ((PetscObject)viewer)->comm; 1650 MPI_Status status; 1651 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1652 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1653 int tag = ((PetscObject)viewer)->tag; 1654 1655 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1656 if (!rank) { 1657 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1658 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1659 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1660 } 1661 1662 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1663 M = header[1]; N = header[2]; 1664 /* determine ownership of all rows */ 1665 m = M/size + ((M % size) > rank); 1666 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1667 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1668 rowners[0] = 0; 1669 for ( i=2; i<=size; i++ ) { 1670 rowners[i] += rowners[i-1]; 1671 } 1672 rstart = rowners[rank]; 1673 rend = rowners[rank+1]; 1674 1675 /* distribute row lengths to all processors */ 1676 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1677 offlens = ourlens + (rend-rstart); 1678 if (!rank) { 1679 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1680 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1681 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1682 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1683 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1684 PetscFree(sndcounts); 1685 } 1686 else { 1687 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1688 } 1689 1690 if (!rank) { 1691 /* calculate the number of nonzeros on each processor */ 1692 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1693 PetscMemzero(procsnz,size*sizeof(int)); 1694 for ( i=0; i<size; i++ ) { 1695 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1696 procsnz[i] += rowlengths[j]; 1697 } 1698 } 1699 PetscFree(rowlengths); 1700 1701 /* determine max buffer needed and allocate it */ 1702 maxnz = 0; 1703 for ( i=0; i<size; i++ ) { 1704 maxnz = PetscMax(maxnz,procsnz[i]); 1705 } 1706 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1707 1708 /* read in my part of the matrix column indices */ 1709 nz = procsnz[0]; 1710 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1711 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1712 1713 /* read in every one elses and ship off */ 1714 for ( i=1; i<size; i++ ) { 1715 nz = procsnz[i]; 1716 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1717 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1718 } 1719 PetscFree(cols); 1720 } 1721 else { 1722 /* determine buffer space needed for message */ 1723 nz = 0; 1724 for ( i=0; i<m; i++ ) { 1725 nz += ourlens[i]; 1726 } 1727 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1728 1729 /* receive message of column indices*/ 1730 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1731 MPI_Get_count(&status,MPI_INT,&maxnz); 1732 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1733 } 1734 1735 /* loop over local rows, determining number of off diagonal entries */ 1736 PetscMemzero(offlens,m*sizeof(int)); 1737 jj = 0; 1738 for ( i=0; i<m; i++ ) { 1739 for ( j=0; j<ourlens[i]; j++ ) { 1740 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1741 jj++; 1742 } 1743 } 1744 1745 /* create our matrix */ 1746 for ( i=0; i<m; i++ ) { 1747 ourlens[i] -= offlens[i]; 1748 } 1749 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1750 A = *newmat; 1751 MatSetOption(A,MAT_COLUMNS_SORTED); 1752 for ( i=0; i<m; i++ ) { 1753 ourlens[i] += offlens[i]; 1754 } 1755 1756 if (!rank) { 1757 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1758 1759 /* read in my part of the matrix numerical values */ 1760 nz = procsnz[0]; 1761 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1762 1763 /* insert into matrix */ 1764 jj = rstart; 1765 smycols = mycols; 1766 svals = vals; 1767 for ( i=0; i<m; i++ ) { 1768 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1769 smycols += ourlens[i]; 1770 svals += ourlens[i]; 1771 jj++; 1772 } 1773 1774 /* read in other processors and ship out */ 1775 for ( i=1; i<size; i++ ) { 1776 nz = procsnz[i]; 1777 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1778 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1779 } 1780 PetscFree(procsnz); 1781 } 1782 else { 1783 /* receive numeric values */ 1784 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1785 1786 /* receive message of values*/ 1787 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1788 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1789 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1790 1791 /* insert into matrix */ 1792 jj = rstart; 1793 smycols = mycols; 1794 svals = vals; 1795 for ( i=0; i<m; i++ ) { 1796 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1797 smycols += ourlens[i]; 1798 svals += ourlens[i]; 1799 jj++; 1800 } 1801 } 1802 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1803 1804 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1805 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1806 return 0; 1807 } 1808