1 #ifndef lint 2 static char vcid[] = "$Id: mpiaij.c,v 1.201 1997/04/10 00:02:50 bsmith Exp curfman $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */ 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 return 0; 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #undef __FUNC__ 33 #define __FUNC__ "MatGetRowIJ_MPIAIJ" 34 int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 35 PetscTruth *done) 36 { 37 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 38 int ierr; 39 if (aij->size == 1) { 40 ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 41 } else SETERRQ(1,0,"not supported in parallel"); 42 return 0; 43 } 44 45 #undef __FUNC__ 46 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ" /* ADIC Ignore */ 47 int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja, 48 PetscTruth *done) 49 { 50 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 51 int ierr; 52 if (aij->size == 1) { 53 ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr); 54 } else SETERRQ(1,0,"not supported in parallel"); 55 return 0; 56 } 57 58 #define CHUNKSIZE 15 59 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 60 { \ 61 \ 62 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 63 rmax = aimax[row]; nrow = ailen[row]; \ 64 col1 = col - shift; \ 65 \ 66 for ( _i=0; _i<nrow; _i++ ) { \ 67 if (rp[_i] > col1) break; \ 68 if (rp[_i] == col1) { \ 69 if (addv == ADD_VALUES) ap[_i] += value; \ 70 else ap[_i] = value; \ 71 goto a_noinsert; \ 72 } \ 73 } \ 74 if (nonew) goto a_noinsert; \ 75 if (nrow >= rmax) { \ 76 /* there is no extra room in row, therefore enlarge */ \ 77 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 78 Scalar *new_a; \ 79 \ 80 /* malloc new storage space */ \ 81 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 82 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 83 new_j = (int *) (new_a + new_nz); \ 84 new_i = new_j + new_nz; \ 85 \ 86 /* copy over old data into new slots */ \ 87 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 88 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 89 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 90 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 91 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 92 len*sizeof(int)); \ 93 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 94 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 95 len*sizeof(Scalar)); \ 96 /* free up old matrix storage */ \ 97 \ 98 PetscFree(a->a); \ 99 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 100 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 101 a->singlemalloc = 1; \ 102 \ 103 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 104 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 105 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 106 a->maxnz += CHUNKSIZE; \ 107 a->reallocs++; \ 108 } \ 109 N = nrow++ - 1; a->nz++; \ 110 /* shift up all the later entries in this row */ \ 111 for ( ii=N; ii>=_i; ii-- ) { \ 112 rp[ii+1] = rp[ii]; \ 113 ap[ii+1] = ap[ii]; \ 114 } \ 115 rp[_i] = col1; \ 116 ap[_i] = value; \ 117 a_noinsert: ; \ 118 ailen[row] = nrow; \ 119 } 120 121 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 122 { \ 123 \ 124 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 125 rmax = bimax[row]; nrow = bilen[row]; \ 126 col1 = col - shift; \ 127 \ 128 for ( _i=0; _i<nrow; _i++ ) { \ 129 if (rp[_i] > col1) break; \ 130 if (rp[_i] == col1) { \ 131 if (addv == ADD_VALUES) ap[_i] += value; \ 132 else ap[_i] = value; \ 133 goto b_noinsert; \ 134 } \ 135 } \ 136 if (nonew) goto b_noinsert; \ 137 if (nrow >= rmax) { \ 138 /* there is no extra room in row, therefore enlarge */ \ 139 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 140 Scalar *new_a; \ 141 \ 142 /* malloc new storage space */ \ 143 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 144 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 145 new_j = (int *) (new_a + new_nz); \ 146 new_i = new_j + new_nz; \ 147 \ 148 /* copy over old data into new slots */ \ 149 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 150 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 151 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 152 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 153 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 154 len*sizeof(int)); \ 155 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 156 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 157 len*sizeof(Scalar)); \ 158 /* free up old matrix storage */ \ 159 \ 160 PetscFree(b->a); \ 161 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 162 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 163 b->singlemalloc = 1; \ 164 \ 165 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 166 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 167 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 168 b->maxnz += CHUNKSIZE; \ 169 b->reallocs++; \ 170 } \ 171 N = nrow++ - 1; b->nz++; \ 172 /* shift up all the later entries in this row */ \ 173 for ( ii=N; ii>=_i; ii-- ) { \ 174 rp[ii+1] = rp[ii]; \ 175 ap[ii+1] = ap[ii]; \ 176 } \ 177 rp[_i] = col1; \ 178 ap[_i] = value; \ 179 b_noinsert: ; \ 180 bilen[row] = nrow; \ 181 } 182 183 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 184 #undef __FUNC__ 185 #define __FUNC__ "MatSetValues_MPIAIJ" 186 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 189 Scalar value; 190 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 191 int cstart = aij->cstart, cend = aij->cend,row,col; 192 int roworiented = aij->roworiented; 193 194 /* Some Variables required in the macro */ 195 Mat A = aij->A; 196 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 197 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 198 Scalar *aa = a->a; 199 200 Mat B = aij->B; 201 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 202 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 203 Scalar *ba = b->a; 204 205 int *rp,ii,nrow,_i,rmax, N, col1; 206 int nonew = a->nonew,shift = a->indexshift; 207 Scalar *ap; 208 209 for ( i=0; i<m; i++ ) { 210 #if defined(PETSC_BOPT_g) 211 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 212 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 213 #endif 214 if (im[i] >= rstart && im[i] < rend) { 215 row = im[i] - rstart; 216 for ( j=0; j<n; j++ ) { 217 if (in[j] >= cstart && in[j] < cend){ 218 col = in[j] - cstart; 219 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 220 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 221 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 222 } 223 #if defined(PETSC_BOPT_g) 224 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 225 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 226 #endif 227 else { 228 if (mat->was_assembled) { 229 if (!aij->colmap) { 230 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 231 } 232 col = aij->colmap[in[j]] - 1; 233 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 234 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 235 col = in[j]; 236 } 237 } 238 else col = in[j]; 239 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 240 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 241 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 242 } 243 } 244 } 245 else { 246 if (roworiented && !aij->donotstash) { 247 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 248 } 249 else { 250 if (!aij->donotstash) { 251 row = im[i]; 252 for ( j=0; j<n; j++ ) { 253 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 254 } 255 } 256 } 257 } 258 } 259 return 0; 260 } 261 262 #undef __FUNC__ 263 #define __FUNC__ "MatGetValues_MPIAIJ" 264 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 265 { 266 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 267 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 268 int cstart = aij->cstart, cend = aij->cend,row,col; 269 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } 282 else { 283 if (!aij->colmap) { 284 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285 } 286 col = aij->colmap[idxn[j]] - 1; 287 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288 else { 289 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290 } 291 } 292 } 293 } 294 else { 295 SETERRQ(1,0,"Only local values currently supported"); 296 } 297 } 298 return 0; 299 } 300 301 #undef __FUNC__ 302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 306 MPI_Comm comm = mat->comm; 307 int size = aij->size, *owners = aij->rowners; 308 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 309 MPI_Request *send_waits,*recv_waits; 310 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 311 InsertMode addv; 312 Scalar *rvalues,*svalues; 313 314 /* make sure all processors are either in INSERTMODE or ADDMODE */ 315 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 316 if (addv == (ADD_VALUES|INSERT_VALUES)) { 317 SETERRQ(1,0,"Some processors inserted others added"); 318 } 319 mat->insertmode = addv; /* in case this processor had no cache */ 320 321 /* first count number of contributors to each processor */ 322 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 323 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 324 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 325 for ( i=0; i<aij->stash.n; i++ ) { 326 idx = aij->stash.idx[i]; 327 for ( j=0; j<size; j++ ) { 328 if (idx >= owners[j] && idx < owners[j+1]) { 329 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 330 } 331 } 332 } 333 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 334 335 /* inform other processors of number of messages and max length*/ 336 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 337 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 338 nreceives = work[rank]; 339 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 340 nmax = work[rank]; 341 PetscFree(work); 342 343 /* post receives: 344 1) each message will consist of ordered pairs 345 (global index,value) we store the global index as a double 346 to simplify the message passing. 347 2) since we don't know how long each individual message is we 348 allocate the largest needed buffer for each receive. Potentially 349 this is a lot of wasted space. 350 351 352 This could be done better. 353 */ 354 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 355 CHKPTRQ(rvalues); 356 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 357 CHKPTRQ(recv_waits); 358 for ( i=0; i<nreceives; i++ ) { 359 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 360 comm,recv_waits+i); 361 } 362 363 /* do sends: 364 1) starts[i] gives the starting index in svalues for stuff going to 365 the ith processor 366 */ 367 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 368 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 369 CHKPTRQ(send_waits); 370 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 371 starts[0] = 0; 372 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 373 for ( i=0; i<aij->stash.n; i++ ) { 374 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 375 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 376 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 377 } 378 PetscFree(owner); 379 starts[0] = 0; 380 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 381 count = 0; 382 for ( i=0; i<size; i++ ) { 383 if (procs[i]) { 384 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 385 comm,send_waits+count++); 386 } 387 } 388 PetscFree(starts); PetscFree(nprocs); 389 390 /* Free cache space */ 391 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 392 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 393 394 aij->svalues = svalues; aij->rvalues = rvalues; 395 aij->nsends = nsends; aij->nrecvs = nreceives; 396 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 397 aij->rmax = nmax; 398 399 return 0; 400 } 401 extern int MatSetUpMultiply_MPIAIJ(Mat); 402 403 #undef __FUNC__ 404 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 405 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 408 MPI_Status *send_status,recv_status; 409 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 410 int row,col,other_disassembled; 411 Scalar *values,val; 412 InsertMode addv = mat->insertmode; 413 414 /* wait on receives */ 415 while (count) { 416 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 417 /* unpack receives into our local space */ 418 values = aij->rvalues + 3*imdex*aij->rmax; 419 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 420 n = n/3; 421 for ( i=0; i<n; i++ ) { 422 row = (int) PetscReal(values[3*i]) - aij->rstart; 423 col = (int) PetscReal(values[3*i+1]); 424 val = values[3*i+2]; 425 if (col >= aij->cstart && col < aij->cend) { 426 col -= aij->cstart; 427 MatSetValues(aij->A,1,&row,1,&col,&val,addv); 428 } 429 else { 430 if (mat->was_assembled || mat->assembled) { 431 if (!aij->colmap) { 432 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 433 } 434 col = aij->colmap[col] - 1; 435 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 436 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 437 col = (int) PetscReal(values[3*i+1]); 438 } 439 } 440 MatSetValues(aij->B,1,&row,1,&col,&val,addv); 441 } 442 } 443 count--; 444 } 445 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 446 447 /* wait on sends */ 448 if (aij->nsends) { 449 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 450 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 451 PetscFree(send_status); 452 } 453 PetscFree(aij->send_waits); PetscFree(aij->svalues); 454 455 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 456 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 457 458 /* determine if any processor has disassembled, if so we must 459 also disassemble ourselfs, in order that we may reassemble. */ 460 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 461 if (mat->was_assembled && !other_disassembled) { 462 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 463 } 464 465 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 466 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 467 } 468 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 469 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 470 471 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 472 return 0; 473 } 474 475 #undef __FUNC__ 476 #define __FUNC__ "MatZeroEntries_MPIAIJ" 477 int MatZeroEntries_MPIAIJ(Mat A) 478 { 479 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 480 int ierr; 481 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 482 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 483 return 0; 484 } 485 486 /* the code does not do the diagonal entries correctly unless the 487 matrix is square and the column and row owerships are identical. 488 This is a BUG. The only way to fix it seems to be to access 489 aij->A and aij->B directly and not through the MatZeroRows() 490 routine. 491 */ 492 #undef __FUNC__ 493 #define __FUNC__ "MatZeroRows_MPIAIJ" 494 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 495 { 496 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 497 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 498 int *procs,*nprocs,j,found,idx,nsends,*work; 499 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 500 int *rvalues,tag = A->tag,count,base,slen,n,*source; 501 int *lens,imdex,*lrows,*values; 502 MPI_Comm comm = A->comm; 503 MPI_Request *send_waits,*recv_waits; 504 MPI_Status recv_status,*send_status; 505 IS istmp; 506 507 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 508 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 509 510 /* first count number of contributors to each processor */ 511 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 512 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 513 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 514 for ( i=0; i<N; i++ ) { 515 idx = rows[i]; 516 found = 0; 517 for ( j=0; j<size; j++ ) { 518 if (idx >= owners[j] && idx < owners[j+1]) { 519 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 520 } 521 } 522 if (!found) SETERRQ(1,0,"Index out of range"); 523 } 524 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 525 526 /* inform other processors of number of messages and max length*/ 527 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 528 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 529 nrecvs = work[rank]; 530 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 531 nmax = work[rank]; 532 PetscFree(work); 533 534 /* post receives: */ 535 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 536 CHKPTRQ(rvalues); 537 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 538 CHKPTRQ(recv_waits); 539 for ( i=0; i<nrecvs; i++ ) { 540 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 541 } 542 543 /* do sends: 544 1) starts[i] gives the starting index in svalues for stuff going to 545 the ith processor 546 */ 547 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 548 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 549 CHKPTRQ(send_waits); 550 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 551 starts[0] = 0; 552 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 553 for ( i=0; i<N; i++ ) { 554 svalues[starts[owner[i]]++] = rows[i]; 555 } 556 ISRestoreIndices(is,&rows); 557 558 starts[0] = 0; 559 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 560 count = 0; 561 for ( i=0; i<size; i++ ) { 562 if (procs[i]) { 563 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 564 } 565 } 566 PetscFree(starts); 567 568 base = owners[rank]; 569 570 /* wait on receives */ 571 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 572 source = lens + nrecvs; 573 count = nrecvs; slen = 0; 574 while (count) { 575 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 576 /* unpack receives into our local space */ 577 MPI_Get_count(&recv_status,MPI_INT,&n); 578 source[imdex] = recv_status.MPI_SOURCE; 579 lens[imdex] = n; 580 slen += n; 581 count--; 582 } 583 PetscFree(recv_waits); 584 585 /* move the data into the send scatter */ 586 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 587 count = 0; 588 for ( i=0; i<nrecvs; i++ ) { 589 values = rvalues + i*nmax; 590 for ( j=0; j<lens[i]; j++ ) { 591 lrows[count++] = values[j] - base; 592 } 593 } 594 PetscFree(rvalues); PetscFree(lens); 595 PetscFree(owner); PetscFree(nprocs); 596 597 /* actually zap the local rows */ 598 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 599 PLogObjectParent(A,istmp); 600 PetscFree(lrows); 601 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 602 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 603 ierr = ISDestroy(istmp); CHKERRQ(ierr); 604 605 /* wait on sends */ 606 if (nsends) { 607 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 608 CHKPTRQ(send_status); 609 MPI_Waitall(nsends,send_waits,send_status); 610 PetscFree(send_status); 611 } 612 PetscFree(send_waits); PetscFree(svalues); 613 614 return 0; 615 } 616 617 #undef __FUNC__ 618 #define __FUNC__ "MatMult_MPIAIJ" 619 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 620 { 621 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 622 int ierr,nt; 623 624 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 625 if (nt != a->n) { 626 SETERRQ(1,0,"Incompatible partition of A and xx"); 627 } 628 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 629 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 630 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 631 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 632 return 0; 633 } 634 635 #undef __FUNC__ 636 #define __FUNC__ "MatMultAdd_MPIAIJ" 637 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 638 { 639 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 640 int ierr; 641 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 642 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 643 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 644 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 645 return 0; 646 } 647 648 #undef __FUNC__ 649 #define __FUNC__ "MatMultTrans_MPIAIJ" 650 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 651 { 652 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 653 int ierr; 654 655 /* do nondiagonal part */ 656 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 657 /* send it on its way */ 658 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 659 /* do local part */ 660 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 661 /* receive remote parts: note this assumes the values are not actually */ 662 /* inserted in yy until the next line, which is true for my implementation*/ 663 /* but is not perhaps always true. */ 664 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 665 return 0; 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 670 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 671 { 672 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 673 int ierr; 674 675 /* do nondiagonal part */ 676 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 677 /* send it on its way */ 678 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 679 /* do local part */ 680 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 681 /* receive remote parts: note this assumes the values are not actually */ 682 /* inserted in yy until the next line, which is true for my implementation*/ 683 /* but is not perhaps always true. */ 684 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 685 return 0; 686 } 687 688 /* 689 This only works correctly for square matrices where the subblock A->A is the 690 diagonal block 691 */ 692 #undef __FUNC__ 693 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 694 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 695 { 696 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 697 if (a->M != a->N) 698 SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 699 if (a->rstart != a->cstart || a->rend != a->cend) { 700 SETERRQ(1,0,"row partition must equal col partition"); } 701 return MatGetDiagonal(a->A,v); 702 } 703 704 #undef __FUNC__ 705 #define __FUNC__ "MatScale_MPIAIJ" 706 int MatScale_MPIAIJ(Scalar *aa,Mat A) 707 { 708 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 709 int ierr; 710 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 711 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 712 return 0; 713 } 714 715 #undef __FUNC__ 716 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */ 717 int MatDestroy_MPIAIJ(PetscObject obj) 718 { 719 Mat mat = (Mat) obj; 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 721 int ierr; 722 #if defined(PETSC_LOG) 723 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 724 #endif 725 PetscFree(aij->rowners); 726 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 727 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 728 if (aij->colmap) PetscFree(aij->colmap); 729 if (aij->garray) PetscFree(aij->garray); 730 if (aij->lvec) VecDestroy(aij->lvec); 731 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 732 if (aij->rowvalues) PetscFree(aij->rowvalues); 733 PetscFree(aij); 734 if (mat->mapping) { 735 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 736 } 737 PLogObjectDestroy(mat); 738 PetscHeaderDestroy(mat); 739 return 0; 740 } 741 742 #undef __FUNC__ 743 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */ 744 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 745 { 746 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 747 int ierr; 748 749 if (aij->size == 1) { 750 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 751 } 752 else SETERRQ(1,0,"Only uniprocessor output supported"); 753 return 0; 754 } 755 756 #undef __FUNC__ 757 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */ 758 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 759 { 760 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 761 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 762 int ierr, format,shift = C->indexshift,rank; 763 FILE *fd; 764 ViewerType vtype; 765 766 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 767 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 768 ierr = ViewerGetFormat(viewer,&format); 769 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 770 MatInfo info; 771 int flg; 772 MPI_Comm_rank(mat->comm,&rank); 773 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 774 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 775 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 776 PetscSequentialPhaseBegin(mat->comm,1); 777 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 778 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 779 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 780 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 781 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 782 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 783 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 784 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 785 fflush(fd); 786 PetscSequentialPhaseEnd(mat->comm,1); 787 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 788 return 0; 789 } 790 else if (format == VIEWER_FORMAT_ASCII_INFO) { 791 return 0; 792 } 793 } 794 795 if (vtype == DRAW_VIEWER) { 796 Draw draw; 797 PetscTruth isnull; 798 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 799 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 800 } 801 802 if (vtype == ASCII_FILE_VIEWER) { 803 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 804 PetscSequentialPhaseBegin(mat->comm,1); 805 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 806 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 807 aij->cend); 808 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 809 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 810 fflush(fd); 811 PetscSequentialPhaseEnd(mat->comm,1); 812 } 813 else { 814 int size = aij->size; 815 rank = aij->rank; 816 if (size == 1) { 817 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 818 } 819 else { 820 /* assemble the entire matrix onto first processor. */ 821 Mat A; 822 Mat_SeqAIJ *Aloc; 823 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 824 Scalar *a; 825 826 if (!rank) { 827 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 828 CHKERRQ(ierr); 829 } 830 else { 831 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 832 CHKERRQ(ierr); 833 } 834 PLogObjectParent(mat,A); 835 836 /* copy over the A part */ 837 Aloc = (Mat_SeqAIJ*) aij->A->data; 838 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 839 row = aij->rstart; 840 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 841 for ( i=0; i<m; i++ ) { 842 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 843 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 844 } 845 aj = Aloc->j; 846 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 847 848 /* copy over the B part */ 849 Aloc = (Mat_SeqAIJ*) aij->B->data; 850 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 851 row = aij->rstart; 852 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 853 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 854 for ( i=0; i<m; i++ ) { 855 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 856 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 857 } 858 PetscFree(ct); 859 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 860 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 861 if (!rank) { 862 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 863 } 864 ierr = MatDestroy(A); CHKERRQ(ierr); 865 } 866 } 867 return 0; 868 } 869 870 #undef __FUNC__ 871 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */ 872 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 873 { 874 Mat mat = (Mat) obj; 875 int ierr; 876 ViewerType vtype; 877 878 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 879 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 880 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 881 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 882 } 883 else if (vtype == BINARY_FILE_VIEWER) { 884 return MatView_MPIAIJ_Binary(mat,viewer); 885 } 886 return 0; 887 } 888 889 /* 890 This has to provide several versions. 891 892 2) a) use only local smoothing updating outer values only once. 893 b) local smoothing updating outer values each inner iteration 894 3) color updating out values betwen colors. 895 */ 896 #undef __FUNC__ 897 #define __FUNC__ "MatRelax_MPIAIJ" 898 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 899 double fshift,int its,Vec xx) 900 { 901 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 902 Mat AA = mat->A, BB = mat->B; 903 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 904 Scalar *b,*x,*xs,*ls,d,*v,sum; 905 int ierr,*idx, *diag; 906 int n = mat->n, m = mat->m, i,shift = A->indexshift; 907 908 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 909 xs = x + shift; /* shift by one for index start of 1 */ 910 ls = ls + shift; 911 if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 912 diag = A->diag; 913 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 914 if (flag & SOR_ZERO_INITIAL_GUESS) { 915 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 916 } 917 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 918 CHKERRQ(ierr); 919 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 920 CHKERRQ(ierr); 921 while (its--) { 922 /* go down through the rows */ 923 for ( i=0; i<m; i++ ) { 924 n = A->i[i+1] - A->i[i]; 925 idx = A->j + A->i[i] + shift; 926 v = A->a + A->i[i] + shift; 927 sum = b[i]; 928 SPARSEDENSEMDOT(sum,xs,v,idx,n); 929 d = fshift + A->a[diag[i]+shift]; 930 n = B->i[i+1] - B->i[i]; 931 idx = B->j + B->i[i] + shift; 932 v = B->a + B->i[i] + shift; 933 SPARSEDENSEMDOT(sum,ls,v,idx,n); 934 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 935 } 936 /* come up through the rows */ 937 for ( i=m-1; i>-1; i-- ) { 938 n = A->i[i+1] - A->i[i]; 939 idx = A->j + A->i[i] + shift; 940 v = A->a + A->i[i] + shift; 941 sum = b[i]; 942 SPARSEDENSEMDOT(sum,xs,v,idx,n); 943 d = fshift + A->a[diag[i]+shift]; 944 n = B->i[i+1] - B->i[i]; 945 idx = B->j + B->i[i] + shift; 946 v = B->a + B->i[i] + shift; 947 SPARSEDENSEMDOT(sum,ls,v,idx,n); 948 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 949 } 950 } 951 } 952 else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 953 if (flag & SOR_ZERO_INITIAL_GUESS) { 954 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 955 } 956 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 957 CHKERRQ(ierr); 958 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx); 959 CHKERRQ(ierr); 960 while (its--) { 961 for ( i=0; i<m; i++ ) { 962 n = A->i[i+1] - A->i[i]; 963 idx = A->j + A->i[i] + shift; 964 v = A->a + A->i[i] + shift; 965 sum = b[i]; 966 SPARSEDENSEMDOT(sum,xs,v,idx,n); 967 d = fshift + A->a[diag[i]+shift]; 968 n = B->i[i+1] - B->i[i]; 969 idx = B->j + B->i[i] + shift; 970 v = B->a + B->i[i] + shift; 971 SPARSEDENSEMDOT(sum,ls,v,idx,n); 972 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 973 } 974 } 975 } 976 else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 977 if (flag & SOR_ZERO_INITIAL_GUESS) { 978 return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx); 979 } 980 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 981 mat->Mvctx); CHKERRQ(ierr); 982 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 983 mat->Mvctx); CHKERRQ(ierr); 984 while (its--) { 985 for ( i=m-1; i>-1; i-- ) { 986 n = A->i[i+1] - A->i[i]; 987 idx = A->j + A->i[i] + shift; 988 v = A->a + A->i[i] + shift; 989 sum = b[i]; 990 SPARSEDENSEMDOT(sum,xs,v,idx,n); 991 d = fshift + A->a[diag[i]+shift]; 992 n = B->i[i+1] - B->i[i]; 993 idx = B->j + B->i[i] + shift; 994 v = B->a + B->i[i] + shift; 995 SPARSEDENSEMDOT(sum,ls,v,idx,n); 996 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 997 } 998 } 999 } 1000 else { 1001 SETERRQ(1,0,"Option not supported"); 1002 } 1003 return 0; 1004 } 1005 1006 #undef __FUNC__ 1007 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */ 1008 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1009 { 1010 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1011 Mat A = mat->A, B = mat->B; 1012 int ierr; 1013 double isend[5], irecv[5]; 1014 1015 info->rows_global = (double)mat->M; 1016 info->columns_global = (double)mat->N; 1017 info->rows_local = (double)mat->m; 1018 info->columns_local = (double)mat->N; 1019 info->block_size = 1.0; 1020 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1021 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1022 isend[3] = info->memory; isend[4] = info->mallocs; 1023 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1024 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1025 isend[3] += info->memory; isend[4] += info->mallocs; 1026 if (flag == MAT_LOCAL) { 1027 info->nz_used = isend[0]; 1028 info->nz_allocated = isend[1]; 1029 info->nz_unneeded = isend[2]; 1030 info->memory = isend[3]; 1031 info->mallocs = isend[4]; 1032 } else if (flag == MAT_GLOBAL_MAX) { 1033 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 1034 info->nz_used = irecv[0]; 1035 info->nz_allocated = irecv[1]; 1036 info->nz_unneeded = irecv[2]; 1037 info->memory = irecv[3]; 1038 info->mallocs = irecv[4]; 1039 } else if (flag == MAT_GLOBAL_SUM) { 1040 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 1041 info->nz_used = irecv[0]; 1042 info->nz_allocated = irecv[1]; 1043 info->nz_unneeded = irecv[2]; 1044 info->memory = irecv[3]; 1045 info->mallocs = irecv[4]; 1046 } 1047 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1048 info->fill_ratio_needed = 0; 1049 info->factor_mallocs = 0; 1050 1051 return 0; 1052 } 1053 1054 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 1055 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 1056 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 1057 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 1058 extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 1059 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1060 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 1061 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 1062 1063 #undef __FUNC__ 1064 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */ 1065 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1066 { 1067 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1068 1069 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1070 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1071 op == MAT_COLUMNS_UNSORTED || 1072 op == MAT_COLUMNS_SORTED || 1073 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1074 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1075 MatSetOption(a->A,op); 1076 MatSetOption(a->B,op); 1077 } else if (op == MAT_ROW_ORIENTED) { 1078 a->roworiented = 1; 1079 MatSetOption(a->A,op); 1080 MatSetOption(a->B,op); 1081 } else if (op == MAT_ROWS_SORTED || 1082 op == MAT_ROWS_UNSORTED || 1083 op == MAT_SYMMETRIC || 1084 op == MAT_STRUCTURALLY_SYMMETRIC || 1085 op == MAT_YES_NEW_DIAGONALS) 1086 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1087 else if (op == MAT_COLUMN_ORIENTED) { 1088 a->roworiented = 0; 1089 MatSetOption(a->A,op); 1090 MatSetOption(a->B,op); 1091 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1092 a->donotstash = 1; 1093 } else if (op == MAT_NO_NEW_DIAGONALS) 1094 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 1095 else 1096 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 1097 return 0; 1098 } 1099 1100 #undef __FUNC__ 1101 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */ 1102 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1103 { 1104 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1105 *m = mat->M; *n = mat->N; 1106 return 0; 1107 } 1108 1109 #undef __FUNC__ 1110 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */ 1111 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1112 { 1113 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1114 *m = mat->m; *n = mat->N; 1115 return 0; 1116 } 1117 1118 #undef __FUNC__ 1119 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */ 1120 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1121 { 1122 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1123 *m = mat->rstart; *n = mat->rend; 1124 return 0; 1125 } 1126 1127 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1128 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1129 1130 #undef __FUNC__ 1131 #define __FUNC__ "MatGetRow_MPIAIJ" 1132 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1133 { 1134 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1135 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1136 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1137 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1138 int *cmap, *idx_p; 1139 1140 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1141 mat->getrowactive = PETSC_TRUE; 1142 1143 if (!mat->rowvalues && (idx || v)) { 1144 /* 1145 allocate enough space to hold information from the longest row. 1146 */ 1147 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1148 int max = 1,m = mat->m,tmp; 1149 for ( i=0; i<m; i++ ) { 1150 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1151 if (max < tmp) { max = tmp; } 1152 } 1153 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1154 CHKPTRQ(mat->rowvalues); 1155 mat->rowindices = (int *) (mat->rowvalues + max); 1156 } 1157 1158 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1159 lrow = row - rstart; 1160 1161 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1162 if (!v) {pvA = 0; pvB = 0;} 1163 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1164 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1165 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1166 nztot = nzA + nzB; 1167 1168 cmap = mat->garray; 1169 if (v || idx) { 1170 if (nztot) { 1171 /* Sort by increasing column numbers, assuming A and B already sorted */ 1172 int imark = -1; 1173 if (v) { 1174 *v = v_p = mat->rowvalues; 1175 for ( i=0; i<nzB; i++ ) { 1176 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1177 else break; 1178 } 1179 imark = i; 1180 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1181 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1182 } 1183 if (idx) { 1184 *idx = idx_p = mat->rowindices; 1185 if (imark > -1) { 1186 for ( i=0; i<imark; i++ ) { 1187 idx_p[i] = cmap[cworkB[i]]; 1188 } 1189 } else { 1190 for ( i=0; i<nzB; i++ ) { 1191 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1192 else break; 1193 } 1194 imark = i; 1195 } 1196 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1197 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1198 } 1199 } 1200 else { 1201 if (idx) *idx = 0; 1202 if (v) *v = 0; 1203 } 1204 } 1205 *nz = nztot; 1206 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1207 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1208 return 0; 1209 } 1210 1211 #undef __FUNC__ 1212 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */ 1213 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1214 { 1215 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1216 if (aij->getrowactive == PETSC_FALSE) { 1217 SETERRQ(1,0,"MatGetRow not called"); 1218 } 1219 aij->getrowactive = PETSC_FALSE; 1220 return 0; 1221 } 1222 1223 #undef __FUNC__ 1224 #define __FUNC__ "MatNorm_MPIAIJ" 1225 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1226 { 1227 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1228 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1229 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1230 double sum = 0.0; 1231 Scalar *v; 1232 1233 if (aij->size == 1) { 1234 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1235 } else { 1236 if (type == NORM_FROBENIUS) { 1237 v = amat->a; 1238 for (i=0; i<amat->nz; i++ ) { 1239 #if defined(PETSC_COMPLEX) 1240 sum += real(conj(*v)*(*v)); v++; 1241 #else 1242 sum += (*v)*(*v); v++; 1243 #endif 1244 } 1245 v = bmat->a; 1246 for (i=0; i<bmat->nz; i++ ) { 1247 #if defined(PETSC_COMPLEX) 1248 sum += real(conj(*v)*(*v)); v++; 1249 #else 1250 sum += (*v)*(*v); v++; 1251 #endif 1252 } 1253 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1254 *norm = sqrt(*norm); 1255 } 1256 else if (type == NORM_1) { /* max column norm */ 1257 double *tmp, *tmp2; 1258 int *jj, *garray = aij->garray; 1259 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1260 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1261 PetscMemzero(tmp,aij->N*sizeof(double)); 1262 *norm = 0.0; 1263 v = amat->a; jj = amat->j; 1264 for ( j=0; j<amat->nz; j++ ) { 1265 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1266 } 1267 v = bmat->a; jj = bmat->j; 1268 for ( j=0; j<bmat->nz; j++ ) { 1269 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1270 } 1271 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1272 for ( j=0; j<aij->N; j++ ) { 1273 if (tmp2[j] > *norm) *norm = tmp2[j]; 1274 } 1275 PetscFree(tmp); PetscFree(tmp2); 1276 } 1277 else if (type == NORM_INFINITY) { /* max row norm */ 1278 double ntemp = 0.0; 1279 for ( j=0; j<amat->m; j++ ) { 1280 v = amat->a + amat->i[j] + shift; 1281 sum = 0.0; 1282 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1283 sum += PetscAbsScalar(*v); v++; 1284 } 1285 v = bmat->a + bmat->i[j] + shift; 1286 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1287 sum += PetscAbsScalar(*v); v++; 1288 } 1289 if (sum > ntemp) ntemp = sum; 1290 } 1291 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1292 } 1293 else { 1294 SETERRQ(1,0,"No support for two norm"); 1295 } 1296 } 1297 return 0; 1298 } 1299 1300 #undef __FUNC__ 1301 #define __FUNC__ "MatTranspose_MPIAIJ" 1302 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1303 { 1304 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1305 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1306 int ierr,shift = Aloc->indexshift; 1307 Mat B; 1308 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1309 Scalar *array; 1310 1311 if (matout == PETSC_NULL && M != N) 1312 SETERRQ(1,0,"Square matrix only for in-place"); 1313 ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0, 1314 PETSC_NULL,&B); CHKERRQ(ierr); 1315 1316 /* copy over the A part */ 1317 Aloc = (Mat_SeqAIJ*) a->A->data; 1318 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1319 row = a->rstart; 1320 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1321 for ( i=0; i<m; i++ ) { 1322 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1323 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1324 } 1325 aj = Aloc->j; 1326 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1327 1328 /* copy over the B part */ 1329 Aloc = (Mat_SeqAIJ*) a->B->data; 1330 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1331 row = a->rstart; 1332 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1333 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1334 for ( i=0; i<m; i++ ) { 1335 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1336 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1337 } 1338 PetscFree(ct); 1339 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1340 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1341 if (matout != PETSC_NULL) { 1342 *matout = B; 1343 } else { 1344 /* This isn't really an in-place transpose .... but free data structures from a */ 1345 PetscFree(a->rowners); 1346 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1347 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1348 if (a->colmap) PetscFree(a->colmap); 1349 if (a->garray) PetscFree(a->garray); 1350 if (a->lvec) VecDestroy(a->lvec); 1351 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1352 PetscFree(a); 1353 PetscMemcpy(A,B,sizeof(struct _Mat)); 1354 PetscHeaderDestroy(B); 1355 } 1356 return 0; 1357 } 1358 1359 #undef __FUNC__ 1360 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1361 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1362 { 1363 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1364 Mat a = aij->A, b = aij->B; 1365 int ierr,s1,s2,s3; 1366 1367 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1368 if (rr) { 1369 s3 = aij->n; 1370 VecGetLocalSize_Fast(rr,s1); 1371 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1372 /* Overlap communication with computation. */ 1373 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1374 } 1375 if (ll) { 1376 VecGetLocalSize_Fast(ll,s1); 1377 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1378 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1379 } 1380 /* scale the diagonal block */ 1381 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1382 1383 if (rr) { 1384 /* Do a scatter end and then right scale the off-diagonal block */ 1385 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1386 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1387 } 1388 1389 return 0; 1390 } 1391 1392 1393 extern int MatPrintHelp_SeqAIJ(Mat); 1394 #undef __FUNC__ 1395 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */ 1396 int MatPrintHelp_MPIAIJ(Mat A) 1397 { 1398 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1399 1400 if (!a->rank) return MatPrintHelp_SeqAIJ(a->A); 1401 else return 0; 1402 } 1403 1404 #undef __FUNC__ 1405 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */ 1406 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1407 { 1408 *bs = 1; 1409 return 0; 1410 } 1411 #undef __FUNC__ 1412 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */ 1413 int MatSetUnfactored_MPIAIJ(Mat A) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1416 int ierr; 1417 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1418 return 0; 1419 } 1420 1421 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1422 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1423 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1424 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1425 /* -------------------------------------------------------------------*/ 1426 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1427 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1428 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1429 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1430 0,0, 1431 0,0, 1432 0,0, 1433 MatRelax_MPIAIJ, 1434 MatTranspose_MPIAIJ, 1435 MatGetInfo_MPIAIJ,0, 1436 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1437 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1438 0, 1439 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1440 0,0,0,0, 1441 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1442 0,0, 1443 0,0,MatConvertSameType_MPIAIJ,0,0, 1444 0,0,0, 1445 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1446 MatPrintHelp_MPIAIJ, 1447 MatScale_MPIAIJ,0,0,0, 1448 MatGetBlockSize_MPIAIJ,0,0,0,0, 1449 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ}; 1450 1451 1452 #undef __FUNC__ 1453 #define __FUNC__ "MatCreateMPIAIJ" 1454 /*@C 1455 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1456 (the default parallel PETSc format). For good matrix assembly performance 1457 the user should preallocate the matrix storage by setting the parameters 1458 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1459 performance can be increased by more than a factor of 50. 1460 1461 Input Parameters: 1462 . comm - MPI communicator 1463 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1464 This value should be the same as the local size used in creating the 1465 y vector for the matrix-vector product y = Ax. 1466 . n - This value should be the same as the local size used in creating the 1467 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1468 calculated if N is given) For square matrices n is almost always m. 1469 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1470 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1471 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1472 (same for all local rows) 1473 . d_nzz - array containing the number of nonzeros in the various rows of the 1474 diagonal portion of the local submatrix (possibly different for each row) 1475 or PETSC_NULL. You must leave room for the diagonal entry even if 1476 it is zero. 1477 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1478 submatrix (same for all local rows). 1479 . o_nzz - array containing the number of nonzeros in the various rows of the 1480 off-diagonal portion of the local submatrix (possibly different for 1481 each row) or PETSC_NULL. 1482 1483 Output Parameter: 1484 . A - the matrix 1485 1486 Notes: 1487 The AIJ format (also called the Yale sparse matrix format or 1488 compressed row storage), is fully compatible with standard Fortran 77 1489 storage. That is, the stored row and column indices can begin at 1490 either one (as in Fortran) or zero. See the users manual for details. 1491 1492 The user MUST specify either the local or global matrix dimensions 1493 (possibly both). 1494 1495 By default, this format uses inodes (identical nodes) when possible. 1496 We search for consecutive rows with the same nonzero structure, thereby 1497 reusing matrix information to achieve increased efficiency. 1498 1499 Options Database Keys: 1500 $ -mat_aij_no_inode - Do not use inodes 1501 $ -mat_aij_inode_limit <limit> - Set inode limit. 1502 $ (max limit=5) 1503 $ -mat_aij_oneindex - Internally use indexing starting at 1 1504 $ rather than 0. Note: When calling MatSetValues(), 1505 $ the user still MUST index entries starting at 0! 1506 1507 Storage Information: 1508 For a square global matrix we define each processor's diagonal portion 1509 to be its local rows and the corresponding columns (a square submatrix); 1510 each processor's off-diagonal portion encompasses the remainder of the 1511 local matrix (a rectangular submatrix). 1512 1513 The user can specify preallocated storage for the diagonal part of 1514 the local submatrix with either d_nz or d_nnz (not both). Set 1515 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1516 memory allocation. Likewise, specify preallocated storage for the 1517 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1518 1519 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1520 the figure below we depict these three local rows and all columns (0-11). 1521 1522 $ 0 1 2 3 4 5 6 7 8 9 10 11 1523 $ ------------------- 1524 $ row 3 | o o o d d d o o o o o o 1525 $ row 4 | o o o d d d o o o o o o 1526 $ row 5 | o o o d d d o o o o o o 1527 $ ------------------- 1528 $ 1529 1530 Thus, any entries in the d locations are stored in the d (diagonal) 1531 submatrix, and any entries in the o locations are stored in the 1532 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1533 stored simply in the MATSEQAIJ format for compressed row storage. 1534 1535 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1536 and o_nz should indicate the number of nonzeros per row in the o matrix. 1537 In general, for PDE problems in which most nonzeros are near the diagonal, 1538 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1539 or you will get TERRIBLE performance; see the users' manual chapter on 1540 matrices. 1541 1542 .keywords: matrix, aij, compressed row, sparse, parallel 1543 1544 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1545 @*/ 1546 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1547 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1548 { 1549 Mat B; 1550 Mat_MPIAIJ *b; 1551 int ierr, i,sum[2],work[2],size; 1552 1553 *A = 0; 1554 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1555 PLogObjectCreate(B); 1556 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1557 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1558 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1559 MPI_Comm_size(comm,&size); 1560 if (size == 1) { 1561 B->ops.getrowij = MatGetRowIJ_MPIAIJ; 1562 B->ops.restorerowij = MatRestoreRowIJ_MPIAIJ; 1563 B->ops.lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ; 1564 B->ops.lufactornumeric = MatLUFactorNumeric_MPIAIJ; 1565 B->ops.lufactor = MatLUFactor_MPIAIJ; 1566 B->ops.solve = MatSolve_MPIAIJ; 1567 B->ops.solveadd = MatSolveAdd_MPIAIJ; 1568 B->ops.solvetrans = MatSolveTrans_MPIAIJ; 1569 B->ops.solvetransadd = MatSolveTransAdd_MPIAIJ; 1570 B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ; 1571 } 1572 B->destroy = MatDestroy_MPIAIJ; 1573 B->view = MatView_MPIAIJ; 1574 B->factor = 0; 1575 B->assembled = PETSC_FALSE; 1576 B->mapping = 0; 1577 1578 B->insertmode = NOT_SET_VALUES; 1579 MPI_Comm_rank(comm,&b->rank); 1580 MPI_Comm_size(comm,&b->size); 1581 1582 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 1583 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1584 1585 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1586 work[0] = m; work[1] = n; 1587 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1588 if (M == PETSC_DECIDE) M = sum[0]; 1589 if (N == PETSC_DECIDE) N = sum[1]; 1590 } 1591 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1592 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1593 b->m = m; B->m = m; 1594 b->n = n; B->n = n; 1595 b->N = N; B->N = N; 1596 b->M = M; B->M = M; 1597 1598 /* build local table of row and column ownerships */ 1599 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1600 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1601 b->cowners = b->rowners + b->size + 2; 1602 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1603 b->rowners[0] = 0; 1604 for ( i=2; i<=b->size; i++ ) { 1605 b->rowners[i] += b->rowners[i-1]; 1606 } 1607 b->rstart = b->rowners[b->rank]; 1608 b->rend = b->rowners[b->rank+1]; 1609 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1610 b->cowners[0] = 0; 1611 for ( i=2; i<=b->size; i++ ) { 1612 b->cowners[i] += b->cowners[i-1]; 1613 } 1614 b->cstart = b->cowners[b->rank]; 1615 b->cend = b->cowners[b->rank+1]; 1616 1617 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1618 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1619 PLogObjectParent(B,b->A); 1620 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1621 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1622 PLogObjectParent(B,b->B); 1623 1624 /* build cache for off array entries formed */ 1625 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1626 b->donotstash = 0; 1627 b->colmap = 0; 1628 b->garray = 0; 1629 b->roworiented = 1; 1630 1631 /* stuff used for matrix vector multiply */ 1632 b->lvec = 0; 1633 b->Mvctx = 0; 1634 1635 /* stuff for MatGetRow() */ 1636 b->rowindices = 0; 1637 b->rowvalues = 0; 1638 b->getrowactive = PETSC_FALSE; 1639 1640 *A = B; 1641 return 0; 1642 } 1643 1644 #undef __FUNC__ 1645 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1646 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1647 { 1648 Mat mat; 1649 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1650 int ierr, len=0, flg; 1651 1652 *newmat = 0; 1653 PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1654 PLogObjectCreate(mat); 1655 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1656 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1657 mat->destroy = MatDestroy_MPIAIJ; 1658 mat->view = MatView_MPIAIJ; 1659 mat->factor = matin->factor; 1660 mat->assembled = PETSC_TRUE; 1661 1662 a->m = mat->m = oldmat->m; 1663 a->n = mat->n = oldmat->n; 1664 a->M = mat->M = oldmat->M; 1665 a->N = mat->N = oldmat->N; 1666 1667 a->rstart = oldmat->rstart; 1668 a->rend = oldmat->rend; 1669 a->cstart = oldmat->cstart; 1670 a->cend = oldmat->cend; 1671 a->size = oldmat->size; 1672 a->rank = oldmat->rank; 1673 mat->insertmode = NOT_SET_VALUES; 1674 a->rowvalues = 0; 1675 a->getrowactive = PETSC_FALSE; 1676 1677 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1678 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1679 a->cowners = a->rowners + a->size + 2; 1680 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1681 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1682 if (oldmat->colmap) { 1683 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1684 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1685 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1686 } else a->colmap = 0; 1687 if (oldmat->garray) { 1688 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1689 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1690 PLogObjectMemory(mat,len*sizeof(int)); 1691 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1692 } else a->garray = 0; 1693 1694 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1695 PLogObjectParent(mat,a->lvec); 1696 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1697 PLogObjectParent(mat,a->Mvctx); 1698 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1699 PLogObjectParent(mat,a->A); 1700 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1701 PLogObjectParent(mat,a->B); 1702 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1703 if (flg) { 1704 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1705 } 1706 *newmat = mat; 1707 return 0; 1708 } 1709 1710 #include "sys.h" 1711 1712 #undef __FUNC__ 1713 #define __FUNC__ "MatLoad_MPIAIJ" 1714 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1715 { 1716 Mat A; 1717 int i, nz, ierr, j,rstart, rend, fd; 1718 Scalar *vals,*svals; 1719 MPI_Comm comm = ((PetscObject)viewer)->comm; 1720 MPI_Status status; 1721 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1722 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1723 int tag = ((PetscObject)viewer)->tag; 1724 1725 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1726 if (!rank) { 1727 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1728 ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1729 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1730 } 1731 1732 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1733 M = header[1]; N = header[2]; 1734 /* determine ownership of all rows */ 1735 m = M/size + ((M % size) > rank); 1736 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1737 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1738 rowners[0] = 0; 1739 for ( i=2; i<=size; i++ ) { 1740 rowners[i] += rowners[i-1]; 1741 } 1742 rstart = rowners[rank]; 1743 rend = rowners[rank+1]; 1744 1745 /* distribute row lengths to all processors */ 1746 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1747 offlens = ourlens + (rend-rstart); 1748 if (!rank) { 1749 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1750 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1751 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1752 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1753 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1754 PetscFree(sndcounts); 1755 } 1756 else { 1757 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1758 } 1759 1760 if (!rank) { 1761 /* calculate the number of nonzeros on each processor */ 1762 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1763 PetscMemzero(procsnz,size*sizeof(int)); 1764 for ( i=0; i<size; i++ ) { 1765 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1766 procsnz[i] += rowlengths[j]; 1767 } 1768 } 1769 PetscFree(rowlengths); 1770 1771 /* determine max buffer needed and allocate it */ 1772 maxnz = 0; 1773 for ( i=0; i<size; i++ ) { 1774 maxnz = PetscMax(maxnz,procsnz[i]); 1775 } 1776 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1777 1778 /* read in my part of the matrix column indices */ 1779 nz = procsnz[0]; 1780 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1781 ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1782 1783 /* read in every one elses and ship off */ 1784 for ( i=1; i<size; i++ ) { 1785 nz = procsnz[i]; 1786 ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 1787 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1788 } 1789 PetscFree(cols); 1790 } 1791 else { 1792 /* determine buffer space needed for message */ 1793 nz = 0; 1794 for ( i=0; i<m; i++ ) { 1795 nz += ourlens[i]; 1796 } 1797 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1798 1799 /* receive message of column indices*/ 1800 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1801 MPI_Get_count(&status,MPI_INT,&maxnz); 1802 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1803 } 1804 1805 /* loop over local rows, determining number of off diagonal entries */ 1806 PetscMemzero(offlens,m*sizeof(int)); 1807 jj = 0; 1808 for ( i=0; i<m; i++ ) { 1809 for ( j=0; j<ourlens[i]; j++ ) { 1810 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1811 jj++; 1812 } 1813 } 1814 1815 /* create our matrix */ 1816 for ( i=0; i<m; i++ ) { 1817 ourlens[i] -= offlens[i]; 1818 } 1819 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1820 A = *newmat; 1821 MatSetOption(A,MAT_COLUMNS_SORTED); 1822 for ( i=0; i<m; i++ ) { 1823 ourlens[i] += offlens[i]; 1824 } 1825 1826 if (!rank) { 1827 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1828 1829 /* read in my part of the matrix numerical values */ 1830 nz = procsnz[0]; 1831 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1832 1833 /* insert into matrix */ 1834 jj = rstart; 1835 smycols = mycols; 1836 svals = vals; 1837 for ( i=0; i<m; i++ ) { 1838 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1839 smycols += ourlens[i]; 1840 svals += ourlens[i]; 1841 jj++; 1842 } 1843 1844 /* read in other processors and ship out */ 1845 for ( i=1; i<size; i++ ) { 1846 nz = procsnz[i]; 1847 ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1848 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1849 } 1850 PetscFree(procsnz); 1851 } 1852 else { 1853 /* receive numeric values */ 1854 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1855 1856 /* receive message of values*/ 1857 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1858 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1859 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1860 1861 /* insert into matrix */ 1862 jj = rstart; 1863 smycols = mycols; 1864 svals = vals; 1865 for ( i=0; i<m; i++ ) { 1866 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1867 smycols += ourlens[i]; 1868 svals += ourlens[i]; 1869 jj++; 1870 } 1871 } 1872 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1873 1874 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1875 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1876 return 0; 1877 } 1878