1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.196 1997/03/29 00:59:34 balay Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 return 0; 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #undef __FUNC__ 33 #define __FUNC__ "MatGetRowIJ_MPIAIJ" 34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 35 PetscTruth *done) 36 { 37 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 38 int ierr; 39 if (aij->size == 1) { 40 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 41 } else SETERRQ(1,0,"not supported in parallel"); 42 return 0; 43 } 44 45 #undef __FUNC__ 46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 48 PetscTruth *done) 49 { 50 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 51 int ierr; 52 if (aij->size == 1) { 53 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54 } else SETERRQ(1,0,"not supported in parallel"); 55 return 0; 56 } 57 58 #define CHUNKSIZE 15 59 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 60 { \ 61 \ 62 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 63 rmax = aimax[row]; nrow = ailen[row]; \ 64 col1 = col - shift; \ 65 \ 66 for ( _i=0; _i<nrow; _i++ ) { \ 67 if (rp[_i] > col1) break; \ 68 if (rp[_i] == col1) { \ 69 if (addv == ADD_VALUES) ap[_i] += value; \ 70 else ap[_i] = value; \ 71 goto a_noinsert; \ 72 } \ 73 } \ 74 if (nonew) goto a_noinsert; \ 75 if (nrow >= rmax) { \ 76 /* there is no extra room in row, therefore enlarge */ \ 77 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 78 Scalar *new_a; \ 79 \ 80 /* malloc new storage space */ \ 81 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 82 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 83 new_j = (int *) (new_a + new_nz); \ 84 new_i = new_j + new_nz; \ 85 \ 86 /* copy over old data into new slots */ \ 87 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 88 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 89 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 90 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 91 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 92 len*sizeof(int)); \ 93 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 94 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 95 len*sizeof(Scalar)); \ 96 /* free up old matrix storage */ \ 97 \ 98 PetscFree(a->a); \ 99 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 100 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 101 a->singlemalloc = 1; \ 102 \ 103 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 104 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 105 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 106 a->maxnz += CHUNKSIZE; \ 107 a->reallocs++; \ 108 } \ 109 N = nrow++ - 1; a->nz++; \ 110 /* shift up all the later entries in this row */ \ 111 for ( ii=N; ii>=_i; ii-- ) { \ 112 rp[ii+1] = rp[ii]; \ 113 ap[ii+1] = ap[ii]; \ 114 } \ 115 rp[_i] = col1; \ 116 ap[_i] = value; \ 117 a_noinsert: ; \ 118 ailen[row] = nrow; \ 119 } 120 121 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 122 { \ 123 \ 124 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 125 rmax = bimax[row]; nrow = bilen[row]; \ 126 col1 = col - shift; \ 127 \ 128 for ( _i=0; _i<nrow; _i++ ) { \ 129 if (rp[_i] > col1) break; \ 130 if (rp[_i] == col1) { \ 131 if (addv == ADD_VALUES) ap[_i] += value; \ 132 else ap[_i] = value; \ 133 goto b_noinsert; \ 134 } \ 135 } \ 136 if (nonew) goto b_noinsert; \ 137 if (nrow >= rmax) { \ 138 /* there is no extra room in row, therefore enlarge */ \ 139 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 140 Scalar *new_a; \ 141 \ 142 /* malloc new storage space */ \ 143 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 144 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 145 new_j = (int *) (new_a + new_nz); \ 146 new_i = new_j + new_nz; \ 147 \ 148 /* copy over old data into new slots */ \ 149 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 150 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 151 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 152 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 153 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 154 len*sizeof(int)); \ 155 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 156 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 157 len*sizeof(Scalar)); \ 158 /* free up old matrix storage */ \ 159 \ 160 PetscFree(b->a); \ 161 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 162 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 163 b->singlemalloc = 1; \ 164 \ 165 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 166 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 167 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 168 b->maxnz += CHUNKSIZE; \ 169 b->reallocs++; \ 170 } \ 171 N = nrow++ - 1; b->nz++; \ 172 /* shift up all the later entries in this row */ \ 173 for ( ii=N; ii>=_i; ii-- ) { \ 174 rp[ii+1] = rp[ii]; \ 175 ap[ii+1] = ap[ii]; \ 176 } \ 177 rp[_i] = col1; \ 178 ap[_i] = value; \ 179 b_noinsert: ; \ 180 bilen[row] = nrow; \ 181 } 182 183 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 184 #undef __FUNC__ 185 #define __FUNC__ "MatSetValues_MPIAIJ" 186 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 189 Scalar value; 190 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 191 int cstart = aij->cstart, cend = aij->cend,row,col; 192 int roworiented = aij->roworiented; 193 194 /* Some Variables required in the macro */ 195 Mat A = aij->A; 196 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 197 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 198 Scalar *aa = a->a; 199 200 Mat B = aij->B; 201 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 202 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 203 Scalar *ba = b->a; 204 205 int *rp,ii,nrow,_i,rmax, N, col1; 206 int nonew = a->nonew,shift = a->indexshift; 207 Scalar *ap; 208 209 for ( i=0; i<m; i++ ) { 210 #if defined(PETSC_BOPT_g) 211 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 212 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 213 #endif 214 if (im[i] >= rstart && im[i] < rend) { 215 row = im[i] - rstart; 216 for ( j=0; j<n; j++ ) { 217 if (in[j] >= cstart && in[j] < cend){ 218 col = in[j] - cstart; 219 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 220 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 221 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 222 } 223 #if defined(PETSC_BOPT_g) 224 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 225 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 226 #endif 227 else { 228 if (mat->was_assembled) { 229 if (!aij->colmap) { 230 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 231 } 232 col = aij->colmap[in[j]] - 1; 233 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 234 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 235 col = in[j]; 236 } 237 } 238 else col = in[j]; 239 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 240 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 241 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 242 } 243 } 244 } 245 else { 246 if (roworiented && !aij->donotstash) { 247 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 248 } 249 else { 250 if (!aij->donotstash) { 251 row = im[i]; 252 for ( j=0; j<n; j++ ) { 253 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 254 } 255 } 256 } 257 } 258 } 259 return 0; 260 } 261 262 #undef __FUNC__ 263 #define __FUNC__ "MatGetValues_MPIAIJ" 264 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 265 { 266 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 267 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 268 int cstart = aij->cstart, cend = aij->cend,row,col; 269 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } 282 else { 283 if (!aij->colmap) { 284 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285 } 286 col = aij->colmap[idxn[j]] - 1; 287 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288 else { 289 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290 } 291 } 292 } 293 } 294 else { 295 SETERRQ(1,0,"Only local values currently supported"); 296 } 297 } 298 return 0; 299 } 300 301 #undef __FUNC__ 302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 306 MPI_Comm comm = mat->comm; 307 int size = aij->size, *owners = aij->rowners; 308 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 309 MPI_Request *send_waits,*recv_waits; 310 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 311 InsertMode addv; 312 Scalar *rvalues,*svalues; 313 314 /* make sure all processors are either in INSERTMODE or ADDMODE */ 315 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 316 if (addv == (ADD_VALUES|INSERT_VALUES)) { 317 SETERRQ(1,0,"Some processors inserted others added"); 318 } 319 mat->insertmode = addv; /* in case this processor had no cache */ 320 321 /* first count number of contributors to each processor */ 322 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 323 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 324 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 325 for ( i=0; i<aij->stash.n; i++ ) { 326 idx = aij->stash.idx[i]; 327 for ( j=0; j<size; j++ ) { 328 if (idx >= owners[j] && idx < owners[j+1]) { 329 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 330 } 331 } 332 } 333 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 334 335 /* inform other processors of number of messages and max length*/ 336 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 337 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 338 nreceives = work[rank]; 339 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 340 nmax = work[rank]; 341 PetscFree(work); 342 343 /* post receives: 344 1) each message will consist of ordered pairs 345 (global index,value) we store the global index as a double 346 to simplify the message passing. 347 2) since we don't know how long each individual message is we 348 allocate the largest needed buffer for each receive. Potentially 349 this is a lot of wasted space. 350 351 352 This could be done better. 353 */ 354 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 355 CHKPTRQ(rvalues); 356 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 357 CHKPTRQ(recv_waits); 358 for ( i=0; i<nreceives; i++ ) { 359 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 360 comm,recv_waits+i); 361 } 362 363 /* do sends: 364 1) starts[i] gives the starting index in svalues for stuff going to 365 the ith processor 366 */ 367 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 368 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 369 CHKPTRQ(send_waits); 370 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 371 starts[0] = 0; 372 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 373 for ( i=0; i<aij->stash.n; i++ ) { 374 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 375 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 376 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 377 } 378 PetscFree(owner); 379 starts[0] = 0; 380 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 381 count = 0; 382 for ( i=0; i<size; i++ ) { 383 if (procs[i]) { 384 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 385 comm,send_waits+count++); 386 } 387 } 388 PetscFree(starts); PetscFree(nprocs); 389 390 /* Free cache space */ 391 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 392 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 393 394 aij->svalues = svalues; aij->rvalues = rvalues; 395 aij->nsends = nsends; aij->nrecvs = nreceives; 396 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 397 aij->rmax = nmax; 398 399 return 0; 400 } 401 extern int MatSetUpMultiply_MPIAIJ(Mat); 402 403 #undef __FUNC__ 404 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 405 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 408 MPI_Status *send_status,recv_status; 409 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 410 int row,col,other_disassembled; 411 Scalar *values,val; 412 InsertMode addv = mat->insertmode; 413 414 /* wait on receives */ 415 while (count) { 416 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 417 /* unpack receives into our local space */ 418 values = aij->rvalues + 3*imdex*aij->rmax; 419 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 420 n = n/3; 421 for ( i=0; i<n; i++ ) { 422 row = (int) PetscReal(values[3*i]) - aij->rstart; 423 col = (int) PetscReal(values[3*i+1]); 424 val = values[3*i+2]; 425 if (col >= aij->cstart && col < aij->cend) { 426 col -= aij->cstart; 427 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 428 } 429 else { 430 if (mat->was_assembled || mat->assembled) { 431 if (!aij->colmap) { 432 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 433 } 434 col = aij->colmap[col] - 1; 435 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 436 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 437 col = (int) PetscReal(values[3*i+1]); 438 } 439 } 440 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 441 } 442 } 443 count--; 444 } 445 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 446 447 /* wait on sends */ 448 if (aij->nsends) { 449 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 450 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 451 PetscFree(send_status); 452 } 453 PetscFree(aij->send_waits); PetscFree(aij->svalues); 454 455 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 456 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 457 458 /* determine if any processor has disassembled, if so we must 459 also disassemble ourselfs, in order that we may reassemble. */ 460 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 461 if (mat->was_assembled && !other_disassembled) { 462 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 463 } 464 465 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 466 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 467 } 468 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 469 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 470 471 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 472 return 0; 473 } 474 475 #undef __FUNC__ 476 #define __FUNC__ "MatZeroEntries_MPIAIJ" 477 int MatZeroEntries_MPIAIJ(Mat A) 478 { 479 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 480 int ierr; 481 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 482 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 483 return 0; 484 } 485 486 /* the code does not do the diagonal entries correctly unless the 487 matrix is square and the column and row owerships are identical. 488 This is a BUG. The only way to fix it seems to be to access 489 aij->A and aij->B directly and not through the MatZeroRows() 490 routine. 491 */ 492 #undef __FUNC__ 493 #define __FUNC__ "MatZeroRows_MPIAIJ" 494 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 495 { 496 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 497 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 498 int *procs,*nprocs,j,found,idx,nsends,*work; 499 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 500 int *rvalues,tag = A->tag,count,base,slen,n,*source; 501 int *lens,imdex,*lrows,*values; 502 MPI_Comm comm = A->comm; 503 MPI_Request *send_waits,*recv_waits; 504 MPI_Status recv_status,*send_status; 505 IS istmp; 506 507 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 508 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 509 510 /* first count number of contributors to each processor */ 511 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 512 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 513 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 514 for ( i=0; i<N; i++ ) { 515 idx = rows[i]; 516 found = 0; 517 for ( j=0; j<size; j++ ) { 518 if (idx >= owners[j] && idx < owners[j+1]) { 519 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 520 } 521 } 522 if (!found) SETERRQ(1,0,"Index out of range"); 523 } 524 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 525 526 /* inform other processors of number of messages and max length*/ 527 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 528 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 529 nrecvs = work[rank]; 530 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 531 nmax = work[rank]; 532 PetscFree(work); 533 534 /* post receives: */ 535 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 536 CHKPTRQ(rvalues); 537 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 538 CHKPTRQ(recv_waits); 539 for ( i=0; i<nrecvs; i++ ) { 540 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 541 } 542 543 /* do sends: 544 1) starts[i] gives the starting index in svalues for stuff going to 545 the ith processor 546 */ 547 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 548 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 549 CHKPTRQ(send_waits); 550 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 551 starts[0] = 0; 552 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 553 for ( i=0; i<N; i++ ) { 554 svalues[starts[owner[i]]++] = rows[i]; 555 } 556 ISRestoreIndices(is,&rows); 557 558 starts[0] = 0; 559 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 560 count = 0; 561 for ( i=0; i<size; i++ ) { 562 if (procs[i]) { 563 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 564 } 565 } 566 PetscFree(starts); 567 568 base = owners[rank]; 569 570 /* wait on receives */ 571 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 572 source = lens + nrecvs; 573 count = nrecvs; slen = 0; 574 while (count) { 575 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 576 /* unpack receives into our local space */ 577 MPI_Get_count(&recv_status,MPI_INT,&n); 578 source[imdex] = recv_status.MPI_SOURCE; 579 lens[imdex] = n; 580 slen += n; 581 count--; 582 } 583 PetscFree(recv_waits); 584 585 /* move the data into the send scatter */ 586 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 587 count = 0; 588 for ( i=0; i<nrecvs; i++ ) { 589 values = rvalues + i*nmax; 590 for ( j=0; j<lens[i]; j++ ) { 591 lrows[count++] = values[j] - base; 592 } 593 } 594 PetscFree(rvalues); PetscFree(lens); 595 PetscFree(owner); PetscFree(nprocs); 596 597 /* actually zap the local rows */ 598 ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 599 PLogObjectParent(A,istmp); 600 PetscFree(lrows); 601 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 602 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 603 ierr = ISDestroy(istmp); CHKERRQ(ierr); 604 605 /* wait on sends */ 606 if (nsends) { 607 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 608 CHKPTRQ(send_status); 609 MPI_Waitall(nsends,send_waits,send_status); 610 PetscFree(send_status); 611 } 612 PetscFree(send_waits); PetscFree(svalues); 613 614 return 0; 615 } 616 617 #undef __FUNC__ 618 #define __FUNC__ "MatMult_MPIAIJ" 619 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 620 { 621 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 622 int ierr,nt; 623 624 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 625 if (nt != a->n) { 626 SETERRQ(1,0,"Incompatible partition of A and xx"); 627 } 628 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 629 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 630 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 631 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 632 return 0; 633 } 634 635 #undef __FUNC__ 636 #define __FUNC__ "MatMultAdd_MPIAIJ" 637 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 638 { 639 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 640 int ierr; 641 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 642 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 643 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 644 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 645 return 0; 646 } 647 648 #undef __FUNC__ 649 #define __FUNC__ "MatMultTrans_MPIAIJ" 650 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 651 { 652 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 653 int ierr; 654 655 /* do nondiagonal part */ 656 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 657 /* send it on its way */ 658 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 659 /* do local part */ 660 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 661 /* receive remote parts: note this assumes the values are not actually */ 662 /* inserted in yy until the next line, which is true for my implementation*/ 663 /* but is not perhaps always true. */ 664 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 665 return 0; 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 670 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 671 { 672 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 673 int ierr; 674 675 /* do nondiagonal part */ 676 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 677 /* send it on its way */ 678 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 679 /* do local part */ 680 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 681 /* receive remote parts: note this assumes the values are not actually */ 682 /* inserted in yy until the next line, which is true for my implementation*/ 683 /* but is not perhaps always true. */ 684 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 685 return 0; 686 } 687 688 /* 689 This only works correctly for square matrices where the subblock A->A is the 690 diagonal block 691 */ 692 #undef __FUNC__ 693 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 694 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 695 { 696 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 697 if (a->M != a->N) 698 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 699 if (a->rstart != a->cstart || a->rend != a->cend) { 700 SETERRQ(1,0,"row partition must equal col partition"); } 701 return MatGetDiagonal(a->A,v); 702 } 703 704 #undef __FUNC__ 705 #define __FUNC__ "MatScale_MPIAIJ" 706 int MatScale_MPIAIJ(Scalar *aa,Mat A) 707 { 708 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 709 int ierr; 710 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 711 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 712 return 0; 713 } 714 715 #undef __FUNC__ 716 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 717 int MatDestroy_MPIAIJ(PetscObject obj) 718 { 719 Mat mat = (Mat) obj; 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 721 int ierr; 722 #if defined(PETSC_LOG) 723 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 724 #endif 725 PetscFree(aij->rowners); 726 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 727 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 728 if (aij->colmap) PetscFree(aij->colmap); 729 if (aij->garray) PetscFree(aij->garray); 730 if (aij->lvec) VecDestroy(aij->lvec); 731 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 732 if (aij->rowvalues) PetscFree(aij->rowvalues); 733 PetscFree(aij); 734 if (mat->mapping) { 735 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 736 } 737 PLogObjectDestroy(mat); 738 PetscHeaderDestroy(mat); 739 return 0; 740 } 741 742 #undef __FUNC__ 743 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 744 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 745 { 746 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 747 int ierr; 748 749 if (aij->size == 1) { 750 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 751 } 752 else SETERRQ(1,0,"Only uniprocessor output supported"); 753 return 0; 754 } 755 756 #undef __FUNC__ 757 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 758 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 759 { 760 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 761 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 762 int ierr, format,shift = C->indexshift,rank; 763 FILE *fd; 764 ViewerType vtype; 765 766 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 767 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 768 ierr = ViewerGetFormat(viewer,&format); 769 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 770 MatInfo info; 771 int flg; 772 MPI_Comm_rank(mat->comm,&rank); 773 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 774 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 775 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 776 PetscSequentialPhaseBegin(mat->comm,1); 777 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 778 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 779 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 780 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 781 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 782 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 783 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 784 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 785 fflush(fd); 786 PetscSequentialPhaseEnd(mat->comm,1); 787 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 788 return 0; 789 } 790 else if (format == VIEWER_FORMAT_ASCII_INFO) { 791 return 0; 792 } 793 } 794 795 if (vtype == DRAW_VIEWER) { 796 Draw draw; 797 PetscTruth isnull; 798 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 799 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 800 } 801 802 if (vtype == ASCII_FILE_VIEWER) { 803 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 804 PetscSequentialPhaseBegin(mat->comm,1); 805 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 806 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 807 aij->cend); 808 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 809 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 810 fflush(fd); 811 PetscSequentialPhaseEnd(mat->comm,1); 812 } 813 else { 814 int size = aij->size; 815 rank = aij->rank; 816 if (size == 1) { 817 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 818 } 819 else { 820 /* assemble the entire matrix onto first processor. */ 821 Mat A; 822 Mat_SeqAIJ *Aloc; 823 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 824 Scalar *a; 825 826 if (!rank) { 827 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 828 CHKERRQ(ierr); 829 } 830 else { 831 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 832 CHKERRQ(ierr); 833 } 834 PLogObjectParent(mat,A); 835 836 /* copy over the A part */ 837 Aloc = (Mat_SeqAIJ*) aij->A->data; 838 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 839 row = aij->rstart; 840 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 841 for ( i=0; i<m; i++ ) { 842 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 843 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 844 } 845 aj = Aloc->j; 846 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 847 848 /* copy over the B part */ 849 Aloc = (Mat_SeqAIJ*) aij->B->data; 850 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 851 row = aij->rstart; 852 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 853 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 854 for ( i=0; i<m; i++ ) { 855 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 856 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 857 } 858 PetscFree(ct); 859 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 860 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 861 if (!rank) { 862 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 863 } 864 ierr = MatDestroy(A); CHKERRQ(ierr); 865 } 866 } 867 return 0; 868 } 869 870 #undef __FUNC__ 871 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 872 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 873 { 874 Mat mat = (Mat) obj; 875 int ierr; 876 ViewerType vtype; 877 878 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 879 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 880 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 881 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 882 } 883 else if (vtype == BINARY_FILE_VIEWER) { 884 return MatView_MPIAIJ_Binary(mat,viewer); 885 } 886 return 0; 887 } 888 889 /* 890 This has to provide several versions. 891 892 2) a) use only local smoothing updating outer values only once. 893 b) local smoothing updating outer values each inner iteration 894 3) color updating out values betwen colors. 895 */ 896 #undef __FUNC__ 897 #define __FUNC__ "MatRelax_MPIAIJ" 898 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 899 double fshift,int its,Vec xx) 900 { 901 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 902 Mat AA = mat->A, BB = mat->B; 903 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 904 Scalar *b,*x,*xs,*ls,d,*v,sum; 905 int ierr,*idx, *diag; 906 int n = mat->n, m = mat->m, i,shift = A->indexshift; 907 908 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 909 xs = x + shift; /* shift by one for index start of 1 */ 910 ls = ls + shift; 911 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 912 diag = A->diag; 913 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 914 if (flag & SOR_ZERO_INITIAL_GUESS) { 915 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 916 } 917 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 918 CHKERRQ(ierr); 919 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 920 CHKERRQ(ierr); 921 while (its--) { 922 /* go down through the rows */ 923 for ( i=0; i<m; i++ ) { 924 n = A->i[i+1] - A->i[i]; 925 idx = A->j + A->i[i] + shift; 926 v = A->a + A->i[i] + shift; 927 sum = b[i]; 928 SPARSEDENSEMDOT(sum,xs,v,idx,n); 929 d = fshift + A->a[diag[i]+shift]; 930 n = B->i[i+1] - B->i[i]; 931 idx = B->j + B->i[i] + shift; 932 v = B->a + B->i[i] + shift; 933 SPARSEDENSEMDOT(sum,ls,v,idx,n); 934 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 935 } 936 /* come up through the rows */ 937 for ( i=m-1; i>-1; i-- ) { 938 n = A->i[i+1] - A->i[i]; 939 idx = A->j + A->i[i] + shift; 940 v = A->a + A->i[i] + shift; 941 sum = b[i]; 942 SPARSEDENSEMDOT(sum,xs,v,idx,n); 943 d = fshift + A->a[diag[i]+shift]; 944 n = B->i[i+1] - B->i[i]; 945 idx = B->j + B->i[i] + shift; 946 v = B->a + B->i[i] + shift; 947 SPARSEDENSEMDOT(sum,ls,v,idx,n); 948 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 949 } 950 } 951 } 952 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 953 if (flag & SOR_ZERO_INITIAL_GUESS) { 954 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 955 } 956 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 957 CHKERRQ(ierr); 958 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 959 CHKERRQ(ierr); 960 while (its--) { 961 for ( i=0; i<m; i++ ) { 962 n = A->i[i+1] - A->i[i]; 963 idx = A->j + A->i[i] + shift; 964 v = A->a + A->i[i] + shift; 965 sum = b[i]; 966 SPARSEDENSEMDOT(sum,xs,v,idx,n); 967 d = fshift + A->a[diag[i]+shift]; 968 n = B->i[i+1] - B->i[i]; 969 idx = B->j + B->i[i] + shift; 970 v = B->a + B->i[i] + shift; 971 SPARSEDENSEMDOT(sum,ls,v,idx,n); 972 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 973 } 974 } 975 } 976 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 977 if (flag & SOR_ZERO_INITIAL_GUESS) { 978 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 979 } 980 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 981 mat->Mvctx); CHKERRQ(ierr); 982 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 983 mat->Mvctx); CHKERRQ(ierr); 984 while (its--) { 985 for ( i=m-1; i>-1; i-- ) { 986 n = A->i[i+1] - A->i[i]; 987 idx = A->j + A->i[i] + shift; 988 v = A->a + A->i[i] + shift; 989 sum = b[i]; 990 SPARSEDENSEMDOT(sum,xs,v,idx,n); 991 d = fshift + A->a[diag[i]+shift]; 992 n = B->i[i+1] - B->i[i]; 993 idx = B->j + B->i[i] + shift; 994 v = B->a + B->i[i] + shift; 995 SPARSEDENSEMDOT(sum,ls,v,idx,n); 996 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 997 } 998 } 999 } 1000 else { 1001 SETERRQ(1,0,"Option not supported"); 1002 } 1003 return 0; 1004 } 1005 1006 #undef __FUNC__ 1007 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 1008 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1009 { 1010 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1011 Mat A = mat->A, B = mat->B; 1012 int ierr; 1013 double isend[5], irecv[5]; 1014 1015 info->rows_global = (double)mat->M; 1016 info->columns_global = (double)mat->N; 1017 info->rows_local = (double)mat->m; 1018 info->columns_local = (double)mat->N; 1019 info->block_size = 1.0; 1020 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1021 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1022 isend[3] = info->memory; isend[4] = info->mallocs; 1023 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1024 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1025 isend[3] += info->memory; isend[4] += info->mallocs; 1026 if (flag == MAT_LOCAL) { 1027 info->nz_used = isend[0]; 1028 info->nz_allocated = isend[1]; 1029 info->nz_unneeded = isend[2]; 1030 info->memory = isend[3]; 1031 info->mallocs = isend[4]; 1032 } else if (flag == MAT_GLOBAL_MAX) { 1033 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 1034 info->nz_used = irecv[0]; 1035 info->nz_allocated = irecv[1]; 1036 info->nz_unneeded = irecv[2]; 1037 info->memory = irecv[3]; 1038 info->mallocs = irecv[4]; 1039 } else if (flag == MAT_GLOBAL_SUM) { 1040 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 1041 info->nz_used = irecv[0]; 1042 info->nz_allocated = irecv[1]; 1043 info->nz_unneeded = irecv[2]; 1044 info->memory = irecv[3]; 1045 info->mallocs = irecv[4]; 1046 } 1047 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1048 info->fill_ratio_needed = 0; 1049 info->factor_mallocs = 0; 1050 1051 return 0; 1052 } 1053 1054 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1055 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1056 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1057 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1058 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1059 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1060 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1061 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1062 1063 #undef __FUNC__ 1064 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 1065 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1066 { 1067 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1068 1069 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1070 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1071 op == MAT_COLUMNS_UNSORTED || 1072 op == MAT_COLUMNS_SORTED || 1073 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1074 MatSetOption(a->A,op); 1075 MatSetOption(a->B,op); 1076 } else if (op == MAT_ROW_ORIENTED) { 1077 a->roworiented = 1; 1078 MatSetOption(a->A,op); 1079 MatSetOption(a->B,op); 1080 } else if (op == MAT_ROWS_SORTED || 1081 op == MAT_ROWS_UNSORTED || 1082 op == MAT_SYMMETRIC || 1083 op == MAT_STRUCTURALLY_SYMMETRIC || 1084 op == MAT_YES_NEW_DIAGONALS) 1085 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1086 else if (op == MAT_COLUMN_ORIENTED) { 1087 a->roworiented = 0; 1088 MatSetOption(a->A,op); 1089 MatSetOption(a->B,op); 1090 } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) { 1091 a->donotstash = 1; 1092 } else if (op == MAT_NO_NEW_DIAGONALS) 1093 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1094 else 1095 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1096 return 0; 1097 } 1098 1099 #undef __FUNC__ 1100 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 1101 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1104 *m = mat->M; *n = mat->N; 1105 return 0; 1106 } 1107 1108 #undef __FUNC__ 1109 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 1110 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1111 { 1112 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1113 *m = mat->m; *n = mat->N; 1114 return 0; 1115 } 1116 1117 #undef __FUNC__ 1118 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 1119 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1120 { 1121 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1122 *m = mat->rstart; *n = mat->rend; 1123 return 0; 1124 } 1125 1126 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1127 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1128 1129 #undef __FUNC__ 1130 #define __FUNC__ "MatGetRow_MPIAIJ" 1131 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1132 { 1133 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1134 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1135 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1136 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1137 int *cmap, *idx_p; 1138 1139 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1140 mat->getrowactive = PETSC_TRUE; 1141 1142 if (!mat->rowvalues && (idx || v)) { 1143 /* 1144 allocate enough space to hold information from the longest row. 1145 */ 1146 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1147 int max = 1,m = mat->m,tmp; 1148 for ( i=0; i<m; i++ ) { 1149 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1150 if (max < tmp) { max = tmp; } 1151 } 1152 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1153 CHKPTRQ(mat->rowvalues); 1154 mat->rowindices = (int *) (mat->rowvalues + max); 1155 } 1156 1157 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1158 lrow = row - rstart; 1159 1160 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1161 if (!v) {pvA = 0; pvB = 0;} 1162 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1163 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1164 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1165 nztot = nzA + nzB; 1166 1167 cmap = mat->garray; 1168 if (v || idx) { 1169 if (nztot) { 1170 /* Sort by increasing column numbers, assuming A and B already sorted */ 1171 int imark = -1; 1172 if (v) { 1173 *v = v_p = mat->rowvalues; 1174 for ( i=0; i<nzB; i++ ) { 1175 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1176 else break; 1177 } 1178 imark = i; 1179 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1180 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1181 } 1182 if (idx) { 1183 *idx = idx_p = mat->rowindices; 1184 if (imark > -1) { 1185 for ( i=0; i<imark; i++ ) { 1186 idx_p[i] = cmap[cworkB[i]]; 1187 } 1188 } else { 1189 for ( i=0; i<nzB; i++ ) { 1190 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1191 else break; 1192 } 1193 imark = i; 1194 } 1195 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1196 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1197 } 1198 } 1199 else { 1200 if (idx) *idx = 0; 1201 if (v) *v = 0; 1202 } 1203 } 1204 *nz = nztot; 1205 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1206 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1207 return 0; 1208 } 1209 1210 #undef __FUNC__ 1211 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 1212 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1213 { 1214 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1215 if (aij->getrowactive == PETSC_FALSE) { 1216 SETERRQ(1,0,"MatGetRow not called"); 1217 } 1218 aij->getrowactive = PETSC_FALSE; 1219 return 0; 1220 } 1221 1222 #undef __FUNC__ 1223 #define __FUNC__ "MatNorm_MPIAIJ" 1224 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1225 { 1226 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1227 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1228 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1229 double sum = 0.0; 1230 Scalar *v; 1231 1232 if (aij->size == 1) { 1233 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1234 } else { 1235 if (type == NORM_FROBENIUS) { 1236 v = amat->a; 1237 for (i=0; i<amat->nz; i++ ) { 1238 #if defined(PETSC_COMPLEX) 1239 sum += real(conj(*v)*(*v)); v++; 1240 #else 1241 sum += (*v)*(*v); v++; 1242 #endif 1243 } 1244 v = bmat->a; 1245 for (i=0; i<bmat->nz; i++ ) { 1246 #if defined(PETSC_COMPLEX) 1247 sum += real(conj(*v)*(*v)); v++; 1248 #else 1249 sum += (*v)*(*v); v++; 1250 #endif 1251 } 1252 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1253 *norm = sqrt(*norm); 1254 } 1255 else if (type == NORM_1) { /* max column norm */ 1256 double *tmp, *tmp2; 1257 int *jj, *garray = aij->garray; 1258 tmp = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1259 tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1260 PetscMemzero(tmp,aij->N*sizeof(double)); 1261 *norm = 0.0; 1262 v = amat->a; jj = amat->j; 1263 for ( j=0; j<amat->nz; j++ ) { 1264 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1265 } 1266 v = bmat->a; jj = bmat->j; 1267 for ( j=0; j<bmat->nz; j++ ) { 1268 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1269 } 1270 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1271 for ( j=0; j<aij->N; j++ ) { 1272 if (tmp2[j] > *norm) *norm = tmp2[j]; 1273 } 1274 PetscFree(tmp); PetscFree(tmp2); 1275 } 1276 else if (type == NORM_INFINITY) { /* max row norm */ 1277 double ntemp = 0.0; 1278 for ( j=0; j<amat->m; j++ ) { 1279 v = amat->a + amat->i[j] + shift; 1280 sum = 0.0; 1281 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1282 sum += PetscAbsScalar(*v); v++; 1283 } 1284 v = bmat->a + bmat->i[j] + shift; 1285 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1286 sum += PetscAbsScalar(*v); v++; 1287 } 1288 if (sum > ntemp) ntemp = sum; 1289 } 1290 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1291 } 1292 else { 1293 SETERRQ(1,0,"No support for two norm"); 1294 } 1295 } 1296 return 0; 1297 } 1298 1299 #undef __FUNC__ 1300 #define __FUNC__ "MatTranspose_MPIAIJ" 1301 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1302 { 1303 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1304 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1305 int ierr,shift = Aloc->indexshift; 1306 Mat B; 1307 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1308 Scalar *array; 1309 1310 if (matout == PETSC_NULL && M != N) 1311 SETERRQ(1,0,"Square matrix only for in-place"); 1312 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1313 PETSC_NULL,&B); CHKERRQ(ierr); 1314 1315 /* copy over the A part */ 1316 Aloc = (Mat_SeqAIJ*) a->A->data; 1317 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1318 row = a->rstart; 1319 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1320 for ( i=0; i<m; i++ ) { 1321 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1322 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1323 } 1324 aj = Aloc->j; 1325 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1326 1327 /* copy over the B part */ 1328 Aloc = (Mat_SeqAIJ*) a->B->data; 1329 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1330 row = a->rstart; 1331 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1332 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1333 for ( i=0; i<m; i++ ) { 1334 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1335 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1336 } 1337 PetscFree(ct); 1338 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1339 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1340 if (matout != PETSC_NULL) { 1341 *matout = B; 1342 } else { 1343 /* This isn't really an in-place transpose .... but free data structures from a */ 1344 PetscFree(a->rowners); 1345 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1346 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1347 if (a->colmap) PetscFree(a->colmap); 1348 if (a->garray) PetscFree(a->garray); 1349 if (a->lvec) VecDestroy(a->lvec); 1350 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1351 PetscFree(a); 1352 PetscMemcpy(A,B,sizeof(struct _Mat)); 1353 PetscHeaderDestroy(B); 1354 } 1355 return 0; 1356 } 1357 1358 #undef __FUNC__ 1359 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1360 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1361 { 1362 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1363 Mat a = aij->A, b = aij->B; 1364 int ierr,s1,s2,s3; 1365 1366 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1367 if (rr) { 1368 s3 = aij->n; 1369 VecGetLocalSize_Fast(rr,s1); 1370 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1371 /* Overlap communication with computation. */ 1372 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1373 } 1374 if (ll) { 1375 VecGetLocalSize_Fast(ll,s1); 1376 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1377 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1378 } 1379 /* scale the diagonal block */ 1380 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1381 1382 if (rr) { 1383 /* Do a scatter end and then right scale the off-diagonal block */ 1384 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1385 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1386 } 1387 1388 return 0; 1389 } 1390 1391 1392 extern int MatPrintHelp_SeqAIJ(Mat); 1393 #undef __FUNC__ 1394 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 1395 int MatPrintHelp_MPIAIJ(Mat A) 1396 { 1397 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1398 1399 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1400 else return 0; 1401 } 1402 1403 #undef __FUNC__ 1404 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 1405 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1406 { 1407 *bs = 1; 1408 return 0; 1409 } 1410 #undef __FUNC__ 1411 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 1412 int MatSetUnfactored_MPIAIJ(Mat A) 1413 { 1414 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1415 int ierr; 1416 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1417 return 0; 1418 } 1419 1420 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1421 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1422 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1423 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1424 /* -------------------------------------------------------------------*/ 1425 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1426 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1427 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1428 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1429 0,0, 1430 0,0, 1431 0,0, 1432 MatRelax_MPIAIJ, 1433 MatTranspose_MPIAIJ, 1434 MatGetInfo_MPIAIJ,0, 1435 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1436 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1437 0, 1438 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1439 0,0,0,0, 1440 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1441 0,0, 1442 0,0,MatConvertSameType_MPIAIJ,0,0, 1443 0,0,0, 1444 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1445 MatPrintHelp_MPIAIJ, 1446 MatScale_MPIAIJ,0,0,0, 1447 MatGetBlockSize_MPIAIJ,0,0,0,0, 1448 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1449 1450 1451 #undef __FUNC__ 1452 #define __FUNC__ "MatCreateMPIAIJ" 1453 /*@C 1454 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1455 (the default parallel PETSc format). For good matrix assembly performance 1456 the user should preallocate the matrix storage by setting the parameters 1457 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1458 performance can be increased by more than a factor of 50. 1459 1460 Input Parameters: 1461 . comm - MPI communicator 1462 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1463 This value should be the same as the local size used in creating the 1464 y vector for the matrix-vector product y = Ax. 1465 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1466 This value should be the same as the local size used in creating the 1467 x vector for the matrix-vector product y = Ax. 1468 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1469 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1470 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1471 (same for all local rows) 1472 . d_nzz - array containing the number of nonzeros in the various rows of the 1473 diagonal portion of the local submatrix (possibly different for each row) 1474 or PETSC_NULL. You must leave room for the diagonal entry even if 1475 it is zero. 1476 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1477 submatrix (same for all local rows). 1478 . o_nzz - array containing the number of nonzeros in the various rows of the 1479 off-diagonal portion of the local submatrix (possibly different for 1480 each row) or PETSC_NULL. 1481 1482 Output Parameter: 1483 . A - the matrix 1484 1485 Notes: 1486 The AIJ format (also called the Yale sparse matrix format or 1487 compressed row storage), is fully compatible with standard Fortran 77 1488 storage. That is, the stored row and column indices can begin at 1489 either one (as in Fortran) or zero. See the users manual for details. 1490 1491 The user MUST specify either the local or global matrix dimensions 1492 (possibly both). 1493 1494 By default, this format uses inodes (identical nodes) when possible. 1495 We search for consecutive rows with the same nonzero structure, thereby 1496 reusing matrix information to achieve increased efficiency. 1497 1498 Options Database Keys: 1499 $ -mat_aij_no_inode - Do not use inodes 1500 $ -mat_aij_inode_limit <limit> - Set inode limit. 1501 $ (max limit=5) 1502 $ -mat_aij_oneindex - Internally use indexing starting at 1 1503 $ rather than 0. Note: When calling MatSetValues(), 1504 $ the user still MUST index entries starting at 0! 1505 1506 Storage Information: 1507 For a square global matrix we define each processor's diagonal portion 1508 to be its local rows and the corresponding columns (a square submatrix); 1509 each processor's off-diagonal portion encompasses the remainder of the 1510 local matrix (a rectangular submatrix). 1511 1512 The user can specify preallocated storage for the diagonal part of 1513 the local submatrix with either d_nz or d_nnz (not both). Set 1514 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1515 memory allocation. Likewise, specify preallocated storage for the 1516 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1517 1518 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1519 the figure below we depict these three local rows and all columns (0-11). 1520 1521 $ 0 1 2 3 4 5 6 7 8 9 10 11 1522 $ ------------------- 1523 $ row 3 | o o o d d d o o o o o o 1524 $ row 4 | o o o d d d o o o o o o 1525 $ row 5 | o o o d d d o o o o o o 1526 $ ------------------- 1527 $ 1528 1529 Thus, any entries in the d locations are stored in the d (diagonal) 1530 submatrix, and any entries in the o locations are stored in the 1531 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1532 stored simply in the MATSEQAIJ format for compressed row storage. 1533 1534 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1535 and o_nz should indicate the number of nonzeros per row in the o matrix. 1536 In general, for PDE problems in which most nonzeros are near the diagonal, 1537 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1538 or you will get TERRIBLE performance; see the users' manual chapter on 1539 matrices. 1540 1541 .keywords: matrix, aij, compressed row, sparse, parallel 1542 1543 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1544 @*/ 1545 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1546 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1547 { 1548 Mat B; 1549 Mat_MPIAIJ *b; 1550 int ierr, i,sum[2],work[2],size; 1551 1552 *A = 0; 1553 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1554 PLogObjectCreate(B); 1555 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1556 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1557 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1558 MPI_Comm_size(comm,&size); 1559 if (size == 1) { 1560 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1561 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1562 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1563 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1564 B->ops.lufactor = MatLUFactor_MPIAIJ; 1565 B->ops.solve = MatSolve_MPIAIJ; 1566 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1567 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1568 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1569 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1570 } 1571 B->destroy = MatDestroy_MPIAIJ; 1572 B->view = MatView_MPIAIJ; 1573 B->factor = 0; 1574 B->assembled = PETSC_FALSE; 1575 B->mapping = 0; 1576 1577 B->insertmode = NOT_SET_VALUES; 1578 MPI_Comm_rank(comm,&b->rank); 1579 MPI_Comm_size(comm,&b->size); 1580 1581 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1582 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1583 1584 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1585 work[0] = m; work[1] = n; 1586 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1587 if (M == PETSC_DECIDE) M = sum[0]; 1588 if (N == PETSC_DECIDE) N = sum[1]; 1589 } 1590 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1591 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1592 b->m = m; B->m = m; 1593 b->n = n; B->n = n; 1594 b->N = N; B->N = N; 1595 b->M = M; B->M = M; 1596 1597 /* build local table of row and column ownerships */ 1598 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1599 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1600 b->cowners = b->rowners + b->size + 2; 1601 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1602 b->rowners[0] = 0; 1603 for ( i=2; i<=b->size; i++ ) { 1604 b->rowners[i] += b->rowners[i-1]; 1605 } 1606 b->rstart = b->rowners[b->rank]; 1607 b->rend = b->rowners[b->rank+1]; 1608 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1609 b->cowners[0] = 0; 1610 for ( i=2; i<=b->size; i++ ) { 1611 b->cowners[i] += b->cowners[i-1]; 1612 } 1613 b->cstart = b->cowners[b->rank]; 1614 b->cend = b->cowners[b->rank+1]; 1615 1616 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1617 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1618 PLogObjectParent(B,b->A); 1619 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1620 ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1621 PLogObjectParent(B,b->B); 1622 1623 /* build cache for off array entries formed */ 1624 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1625 b->donotstash = 0; 1626 b->colmap = 0; 1627 b->garray = 0; 1628 b->roworiented = 1; 1629 1630 /* stuff used for matrix vector multiply */ 1631 b->lvec = 0; 1632 b->Mvctx = 0; 1633 1634 /* stuff for MatGetRow() */ 1635 b->rowindices = 0; 1636 b->rowvalues = 0; 1637 b->getrowactive = PETSC_FALSE; 1638 1639 *A = B; 1640 return 0; 1641 } 1642 1643 #undef __FUNC__ 1644 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1645 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1646 { 1647 Mat mat; 1648 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1649 int ierr, len=0, flg; 1650 1651 *newmat = 0; 1652 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1653 PLogObjectCreate(mat); 1654 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1655 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1656 mat->destroy = MatDestroy_MPIAIJ; 1657 mat->view = MatView_MPIAIJ; 1658 mat->factor = matin->factor; 1659 mat->assembled = PETSC_TRUE; 1660 1661 a->m = mat->m = oldmat->m; 1662 a->n = mat->n = oldmat->n; 1663 a->M = mat->M = oldmat->M; 1664 a->N = mat->N = oldmat->N; 1665 1666 a->rstart = oldmat->rstart; 1667 a->rend = oldmat->rend; 1668 a->cstart = oldmat->cstart; 1669 a->cend = oldmat->cend; 1670 a->size = oldmat->size; 1671 a->rank = oldmat->rank; 1672 mat->insertmode = NOT_SET_VALUES; 1673 a->rowvalues = 0; 1674 a->getrowactive = PETSC_FALSE; 1675 1676 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1677 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1678 a->cowners = a->rowners + a->size + 2; 1679 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1680 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1681 if (oldmat->colmap) { 1682 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1683 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1684 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1685 } else a->colmap = 0; 1686 if (oldmat->garray) { 1687 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1688 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1689 PLogObjectMemory(mat,len*sizeof(int)); 1690 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1691 } else a->garray = 0; 1692 1693 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1694 PLogObjectParent(mat,a->lvec); 1695 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1696 PLogObjectParent(mat,a->Mvctx); 1697 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1698 PLogObjectParent(mat,a->A); 1699 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1700 PLogObjectParent(mat,a->B); 1701 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1702 if (flg) { 1703 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1704 } 1705 *newmat = mat; 1706 return 0; 1707 } 1708 1709 #include "sys.h" 1710 1711 #undef __FUNC__ 1712 #define __FUNC__ "MatLoad_MPIAIJ" 1713 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1714 { 1715 Mat A; 1716 int i, nz, ierr, j,rstart, rend, fd; 1717 Scalar *vals,*svals; 1718 MPI_Comm comm = ((PetscObject)viewer)->comm; 1719 MPI_Status status; 1720 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1721 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1722 int tag = ((PetscObject)viewer)->tag; 1723 1724 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1725 if (!rank) { 1726 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1727 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1728 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1729 } 1730 1731 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1732 M = header[1]; N = header[2]; 1733 /* determine ownership of all rows */ 1734 m = M/size + ((M % size) > rank); 1735 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1736 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1737 rowners[0] = 0; 1738 for ( i=2; i<=size; i++ ) { 1739 rowners[i] += rowners[i-1]; 1740 } 1741 rstart = rowners[rank]; 1742 rend = rowners[rank+1]; 1743 1744 /* distribute row lengths to all processors */ 1745 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1746 offlens = ourlens + (rend-rstart); 1747 if (!rank) { 1748 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1749 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1750 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1751 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1752 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1753 PetscFree(sndcounts); 1754 } 1755 else { 1756 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1757 } 1758 1759 if (!rank) { 1760 /* calculate the number of nonzeros on each processor */ 1761 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1762 PetscMemzero(procsnz,size*sizeof(int)); 1763 for ( i=0; i<size; i++ ) { 1764 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1765 procsnz[i] += rowlengths[j]; 1766 } 1767 } 1768 PetscFree(rowlengths); 1769 1770 /* determine max buffer needed and allocate it */ 1771 maxnz = 0; 1772 for ( i=0; i<size; i++ ) { 1773 maxnz = PetscMax(maxnz,procsnz[i]); 1774 } 1775 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1776 1777 /* read in my part of the matrix column indices */ 1778 nz = procsnz[0]; 1779 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1780 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1781 1782 /* read in every one elses and ship off */ 1783 for ( i=1; i<size; i++ ) { 1784 nz = procsnz[i]; 1785 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1786 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1787 } 1788 PetscFree(cols); 1789 } 1790 else { 1791 /* determine buffer space needed for message */ 1792 nz = 0; 1793 for ( i=0; i<m; i++ ) { 1794 nz += ourlens[i]; 1795 } 1796 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1797 1798 /* receive message of column indices*/ 1799 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1800 MPI_Get_count(&status,MPI_INT,&maxnz); 1801 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1802 } 1803 1804 /* loop over local rows, determining number of off diagonal entries */ 1805 PetscMemzero(offlens,m*sizeof(int)); 1806 jj = 0; 1807 for ( i=0; i<m; i++ ) { 1808 for ( j=0; j<ourlens[i]; j++ ) { 1809 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1810 jj++; 1811 } 1812 } 1813 1814 /* create our matrix */ 1815 for ( i=0; i<m; i++ ) { 1816 ourlens[i] -= offlens[i]; 1817 } 1818 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1819 A = *newmat; 1820 MatSetOption(A,MAT_COLUMNS_SORTED); 1821 for ( i=0; i<m; i++ ) { 1822 ourlens[i] += offlens[i]; 1823 } 1824 1825 if (!rank) { 1826 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1827 1828 /* read in my part of the matrix numerical values */ 1829 nz = procsnz[0]; 1830 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1831 1832 /* insert into matrix */ 1833 jj = rstart; 1834 smycols = mycols; 1835 svals = vals; 1836 for ( i=0; i<m; i++ ) { 1837 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1838 smycols += ourlens[i]; 1839 svals += ourlens[i]; 1840 jj++; 1841 } 1842 1843 /* read in other processors and ship out */ 1844 for ( i=1; i<size; i++ ) { 1845 nz = procsnz[i]; 1846 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1847 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1848 } 1849 PetscFree(procsnz); 1850 } 1851 else { 1852 /* receive numeric values */ 1853 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1854 1855 /* receive message of values*/ 1856 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1857 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1858 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1859 1860 /* insert into matrix */ 1861 jj = rstart; 1862 smycols = mycols; 1863 svals = vals; 1864 for ( i=0; i<m; i++ ) { 1865 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1866 smycols += ourlens[i]; 1867 svals += ourlens[i]; 1868 jj++; 1869 } 1870 } 1871 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1872 1873 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1874 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1875 return 0; 1876 } 1877