1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.286 1999/03/09 21:32:49 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 extern int MatSetUpMultiply_MPIAIJ(Mat); 10 extern int DisAssemble_MPIAIJ(Mat); 11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatPrintHelp_SeqAIJ(Mat); 15 16 /* local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 27 int n = B->n,i; 28 #if defined (USE_CTABLE) 29 int ierr; 30 #endif 31 32 PetscFunctionBegin; 33 #if defined (USE_CTABLE) 34 ierr = TableCreate(aij->n/5,&aij->colmap); CHKERRQ(ierr); 35 for ( i=0; i<n; i++ ){ 36 ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 37 } 38 #else 39 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 40 PLogObjectMemory(mat,aij->N*sizeof(int)); 41 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 42 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 43 #endif 44 PetscFunctionReturn(0); 45 } 46 47 #define CHUNKSIZE 15 48 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 49 { \ 50 \ 51 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 52 rmax = aimax[row]; nrow = ailen[row]; \ 53 col1 = col - shift; \ 54 \ 55 low = 0; high = nrow; \ 56 while (high-low > 5) { \ 57 t = (low+high)/2; \ 58 if (rp[t] > col) high = t; \ 59 else low = t; \ 60 } \ 61 for ( _i=0; _i<nrow; _i++ ) { \ 62 if (rp[_i] > col1) break; \ 63 if (rp[_i] == col1) { \ 64 if (addv == ADD_VALUES) ap[_i] += value; \ 65 else ap[_i] = value; \ 66 goto a_noinsert; \ 67 } \ 68 } \ 69 if (nonew == 1) goto a_noinsert; \ 70 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 71 if (nrow >= rmax) { \ 72 /* there is no extra room in row, therefore enlarge */ \ 73 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 74 Scalar *new_a; \ 75 \ 76 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 77 \ 78 /* malloc new storage space */ \ 79 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 80 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 81 new_j = (int *) (new_a + new_nz); \ 82 new_i = new_j + new_nz; \ 83 \ 84 /* copy over old data into new slots */ \ 85 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 86 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 87 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 88 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 89 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 90 len*sizeof(int)); \ 91 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 92 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 93 len*sizeof(Scalar)); \ 94 /* free up old matrix storage */ \ 95 \ 96 PetscFree(a->a); \ 97 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = 1; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for ( ii=N; ii>=_i; ii-- ) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for ( _i=0; _i<nrow; _i++ ) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 152 new_j = (int *) (new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 157 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int)); \ 162 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 163 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar)); \ 165 /* free up old matrix storage */ \ 166 \ 167 PetscFree(b->a); \ 168 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 169 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 170 b->singlemalloc = 1; \ 171 \ 172 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 173 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 174 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 175 b->maxnz += CHUNKSIZE; \ 176 b->reallocs++; \ 177 } \ 178 N = nrow++ - 1; b->nz++; \ 179 /* shift up all the later entries in this row */ \ 180 for ( ii=N; ii>=_i; ii-- ) { \ 181 rp[ii+1] = rp[ii]; \ 182 ap[ii+1] = ap[ii]; \ 183 } \ 184 rp[_i] = col1; \ 185 ap[_i] = value; \ 186 b_noinsert: ; \ 187 bilen[row] = nrow; \ 188 } 189 190 #undef __FUNC__ 191 #define __FUNC__ "MatSetValues_MPIAIJ" 192 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 193 { 194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 195 Scalar value; 196 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 197 int cstart = aij->cstart, cend = aij->cend,row,col; 198 int roworiented = aij->roworiented; 199 200 /* Some Variables required in the macro */ 201 Mat A = aij->A; 202 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 203 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 204 Scalar *aa = a->a; 205 206 Mat B = aij->B; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 208 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 209 Scalar *ba = b->a; 210 211 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 212 int nonew = a->nonew,shift = a->indexshift; 213 Scalar *ap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 if (im[i] < 0) continue; 218 #if defined(USE_PETSC_BOPT_g) 219 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart && im[i] < rend) { 222 row = im[i] - rstart; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart && in[j] < cend){ 225 col = in[j] - cstart; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 else if (in[j] < 0) continue; 231 #if defined(USE_PETSC_BOPT_g) 232 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!aij->colmap) { 237 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined (USE_CTABLE) 240 ierr = TableFind(aij->colmap,in[j]+1,&col); CHKERRQ(ierr); 241 col--; 242 #else 243 col = aij->colmap[in[j]] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 246 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 249 B = aij->B; 250 b = (Mat_SeqAIJ *) B->data; 251 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 252 ba = b->a; 253 } 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 256 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 257 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 258 } 259 } 260 } else { 261 if (roworiented && !aij->donotstash) { 262 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 263 } else { 264 if (!aij->donotstash) { 265 row = im[i]; 266 for ( j=0; j<n; j++ ) { 267 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNC__ 277 #define __FUNC__ "MatGetValues_MPIAIJ" 278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 279 { 280 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 281 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 282 int cstart = aij->cstart, cend = aij->cend,row,col; 283 284 PetscFunctionBegin; 285 for ( i=0; i<m; i++ ) { 286 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 287 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 288 if (idxm[i] >= rstart && idxm[i] < rend) { 289 row = idxm[i] - rstart; 290 for ( j=0; j<n; j++ ) { 291 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 292 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 293 if (idxn[j] >= cstart && idxn[j] < cend){ 294 col = idxn[j] - cstart; 295 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 296 } else { 297 if (!aij->colmap) { 298 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 299 } 300 #if defined (USE_CTABLE) 301 ierr = TableFind(aij->colmap,idxn[j]+1,&col); CHKERRQ(ierr); 302 col --; 303 #else 304 col = aij->colmap[idxn[j]] - 1; 305 #endif 306 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 307 else { 308 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 309 } 310 } 311 } 312 } else { 313 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 322 { 323 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 324 int ierr; 325 InsertMode addv; 326 327 PetscFunctionBegin; 328 if (aij->donotstash) { 329 PetscFunctionReturn(0); 330 } 331 332 /* make sure all processors are either in INSERTMODE or ADDMODE */ 333 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 334 if (addv == (ADD_VALUES|INSERT_VALUES)) { 335 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 336 } 337 mat->insertmode = addv; /* in case this processor had no cache */ 338 339 ierr = StashScatterBegin_Private(&aij->stash,aij->rowners); CHKERRQ(ierr); 340 /* Free cache space */ 341 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 342 PetscFunctionReturn(0); 343 } 344 345 346 #undef __FUNC__ 347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 349 { 350 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 351 int i,j,rstart,ncols,n,ierr,flg; 352 int *row,*col,other_disassembled; 353 Scalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = StashScatterGetMesg_Private(&aij->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 360 if (!flg) break; 361 362 for ( i=0; i<n; ) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = StashScatterEnd_Private(&aij->stash); CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 388 } 389 } 390 391 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 392 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 393 } 394 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 395 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 396 397 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 398 PetscFunctionReturn(0); 399 } 400 401 #undef __FUNC__ 402 #define __FUNC__ "MatZeroEntries_MPIAIJ" 403 int MatZeroEntries_MPIAIJ(Mat A) 404 { 405 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 406 int ierr; 407 408 PetscFunctionBegin; 409 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 410 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 411 PetscFunctionReturn(0); 412 } 413 414 #undef __FUNC__ 415 #define __FUNC__ "MatZeroRows_MPIAIJ" 416 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 417 { 418 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 419 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 420 int *procs,*nprocs,j,found,idx,nsends,*work,row; 421 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 422 int *rvalues,tag = A->tag,count,base,slen,n,*source; 423 int *lens,imdex,*lrows,*values,rstart=l->rstart; 424 MPI_Comm comm = A->comm; 425 MPI_Request *send_waits,*recv_waits; 426 MPI_Status recv_status,*send_status; 427 IS istmp; 428 429 PetscFunctionBegin; 430 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 431 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 432 433 /* first count number of contributors to each processor */ 434 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 435 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 436 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 437 for ( i=0; i<N; i++ ) { 438 idx = rows[i]; 439 found = 0; 440 for ( j=0; j<size; j++ ) { 441 if (idx >= owners[j] && idx < owners[j+1]) { 442 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 443 } 444 } 445 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 446 } 447 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 448 449 /* inform other processors of number of messages and max length*/ 450 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 451 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 452 nrecvs = work[rank]; 453 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 454 nmax = work[rank]; 455 PetscFree(work); 456 457 /* post receives: */ 458 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 459 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 460 for ( i=0; i<nrecvs; i++ ) { 461 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 462 } 463 464 /* do sends: 465 1) starts[i] gives the starting index in svalues for stuff going to 466 the ith processor 467 */ 468 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 469 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 470 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 471 starts[0] = 0; 472 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 473 for ( i=0; i<N; i++ ) { 474 svalues[starts[owner[i]]++] = rows[i]; 475 } 476 ISRestoreIndices(is,&rows); 477 478 starts[0] = 0; 479 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 480 count = 0; 481 for ( i=0; i<size; i++ ) { 482 if (procs[i]) { 483 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 484 } 485 } 486 PetscFree(starts); 487 488 base = owners[rank]; 489 490 /* wait on receives */ 491 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 492 source = lens + nrecvs; 493 count = nrecvs; slen = 0; 494 while (count) { 495 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 496 /* unpack receives into our local space */ 497 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 498 source[imdex] = recv_status.MPI_SOURCE; 499 lens[imdex] = n; 500 slen += n; 501 count--; 502 } 503 PetscFree(recv_waits); 504 505 /* move the data into the send scatter */ 506 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 507 count = 0; 508 for ( i=0; i<nrecvs; i++ ) { 509 values = rvalues + i*nmax; 510 for ( j=0; j<lens[i]; j++ ) { 511 lrows[count++] = values[j] - base; 512 } 513 } 514 PetscFree(rvalues); PetscFree(lens); 515 PetscFree(owner); PetscFree(nprocs); 516 517 /* actually zap the local rows */ 518 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 519 PLogObjectParent(A,istmp); 520 521 /* 522 Zero the required rows. If the "diagonal block" of the matrix 523 is square and the user wishes to set the diagonal we use seperate 524 code so that MatSetValues() is not called for each diagonal allocating 525 new memory, thus calling lots of mallocs and slowing things down. 526 527 Contributed by: Mathew Knepley 528 */ 529 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 530 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 531 if (diag && (l->A->M == l->A->N)) { 532 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 533 } else if (diag) { 534 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 535 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 536 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 537 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 538 } 539 for ( i = 0; i < slen; i++ ) { 540 row = lrows[i] + rstart; 541 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 542 } 543 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 544 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 } else { 546 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 547 } 548 ierr = ISDestroy(istmp); CHKERRQ(ierr); 549 PetscFree(lrows); 550 551 /* wait on sends */ 552 if (nsends) { 553 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 554 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 555 PetscFree(send_status); 556 } 557 PetscFree(send_waits); PetscFree(svalues); 558 559 PetscFunctionReturn(0); 560 } 561 562 #undef __FUNC__ 563 #define __FUNC__ "MatMult_MPIAIJ" 564 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 565 { 566 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 567 int ierr,nt; 568 569 PetscFunctionBegin; 570 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 571 if (nt != a->n) { 572 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 573 } 574 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 575 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 576 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 577 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNC__ 582 #define __FUNC__ "MatMultAdd_MPIAIJ" 583 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 584 { 585 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 586 int ierr; 587 588 PetscFunctionBegin; 589 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 590 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 591 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNC__ 597 #define __FUNC__ "MatMultTrans_MPIAIJ" 598 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 599 { 600 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 601 int ierr; 602 603 PetscFunctionBegin; 604 /* do nondiagonal part */ 605 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 606 /* send it on its way */ 607 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 608 /* do local part */ 609 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 610 /* receive remote parts: note this assumes the values are not actually */ 611 /* inserted in yy until the next line, which is true for my implementation*/ 612 /* but is not perhaps always true. */ 613 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 614 PetscFunctionReturn(0); 615 } 616 617 #undef __FUNC__ 618 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 619 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 620 { 621 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 622 int ierr; 623 624 PetscFunctionBegin; 625 /* do nondiagonal part */ 626 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 627 /* send it on its way */ 628 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 629 /* do local part */ 630 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 631 /* receive remote parts: note this assumes the values are not actually */ 632 /* inserted in yy until the next line, which is true for my implementation*/ 633 /* but is not perhaps always true. */ 634 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 635 PetscFunctionReturn(0); 636 } 637 638 /* 639 This only works correctly for square matrices where the subblock A->A is the 640 diagonal block 641 */ 642 #undef __FUNC__ 643 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 644 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 645 { 646 int ierr; 647 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 648 649 PetscFunctionBegin; 650 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 651 if (a->rstart != a->cstart || a->rend != a->cend) { 652 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 653 } 654 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNC__ 659 #define __FUNC__ "MatScale_MPIAIJ" 660 int MatScale_MPIAIJ(Scalar *aa,Mat A) 661 { 662 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 663 int ierr; 664 665 PetscFunctionBegin; 666 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 667 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 #undef __FUNC__ 672 #define __FUNC__ "MatDestroy_MPIAIJ" 673 int MatDestroy_MPIAIJ(Mat mat) 674 { 675 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 676 int ierr; 677 678 PetscFunctionBegin; 679 if (--mat->refct > 0) PetscFunctionReturn(0); 680 681 if (mat->mapping) { 682 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 683 } 684 if (mat->bmapping) { 685 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 686 } 687 if (mat->rmap) { 688 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 689 } 690 if (mat->cmap) { 691 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 692 } 693 #if defined(USE_PETSC_LOG) 694 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 695 #endif 696 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 697 PetscFree(aij->rowners); 698 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 699 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 700 #if defined (USE_CTABLE) 701 if (aij->colmap) TableDelete(aij->colmap); 702 #else 703 if (aij->colmap) PetscFree(aij->colmap); 704 #endif 705 if (aij->garray) PetscFree(aij->garray); 706 if (aij->lvec) VecDestroy(aij->lvec); 707 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 708 if (aij->rowvalues) PetscFree(aij->rowvalues); 709 PetscFree(aij); 710 PLogObjectDestroy(mat); 711 PetscHeaderDestroy(mat); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNC__ 716 #define __FUNC__ "MatView_MPIAIJ_Binary" 717 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 718 { 719 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 720 int ierr; 721 722 PetscFunctionBegin; 723 if (aij->size == 1) { 724 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 725 } 726 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNC__ 731 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 732 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 733 { 734 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 735 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 736 int ierr, format,shift = C->indexshift,rank = aij->rank ; 737 int size = aij->size; 738 FILE *fd; 739 ViewerType vtype; 740 741 PetscFunctionBegin; 742 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 743 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 744 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 745 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 746 MatInfo info; 747 int flg; 748 MPI_Comm_rank(mat->comm,&rank); 749 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 750 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 751 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 752 PetscSequentialPhaseBegin(mat->comm,1); 753 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 754 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 755 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 756 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 757 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 758 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 759 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 760 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 761 fflush(fd); 762 PetscSequentialPhaseEnd(mat->comm,1); 763 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 766 PetscFunctionReturn(0); 767 } 768 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 769 Draw draw; 770 PetscTruth isnull; 771 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 772 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 773 } 774 775 if (size == 1) { 776 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 777 } else { 778 /* assemble the entire matrix onto first processor. */ 779 Mat A; 780 Mat_SeqAIJ *Aloc; 781 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 782 Scalar *a; 783 784 if (!rank) { 785 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 786 } else { 787 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 788 } 789 PLogObjectParent(mat,A); 790 791 /* copy over the A part */ 792 Aloc = (Mat_SeqAIJ*) aij->A->data; 793 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 794 row = aij->rstart; 795 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 796 for ( i=0; i<m; i++ ) { 797 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 798 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 799 } 800 aj = Aloc->j; 801 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 802 803 /* copy over the B part */ 804 Aloc = (Mat_SeqAIJ*) aij->B->data; 805 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 806 row = aij->rstart; 807 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 808 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 809 for ( i=0; i<m; i++ ) { 810 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 811 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 812 } 813 PetscFree(ct); 814 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 816 /* 817 Everyone has to call to draw the matrix since the graphics waits are 818 synchronized across all processors that share the Draw object 819 */ 820 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 821 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 822 } 823 ierr = MatDestroy(A); CHKERRQ(ierr); 824 } 825 PetscFunctionReturn(0); 826 } 827 828 #undef __FUNC__ 829 #define __FUNC__ "MatView_MPIAIJ" 830 int MatView_MPIAIJ(Mat mat,Viewer viewer) 831 { 832 int ierr; 833 ViewerType vtype; 834 835 PetscFunctionBegin; 836 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 837 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 838 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 839 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 840 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 841 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 842 } else { 843 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 844 } 845 PetscFunctionReturn(0); 846 } 847 848 /* 849 This has to provide several versions. 850 851 2) a) use only local smoothing updating outer values only once. 852 b) local smoothing updating outer values each inner iteration 853 3) color updating out values betwen colors. 854 */ 855 #undef __FUNC__ 856 #define __FUNC__ "MatRelax_MPIAIJ" 857 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 858 double fshift,int its,Vec xx) 859 { 860 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 861 Mat AA = mat->A, BB = mat->B; 862 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 863 Scalar *b,*x,*xs,*ls,d,*v,sum; 864 int ierr,*idx, *diag; 865 int n = mat->n, m = mat->m, i,shift = A->indexshift; 866 867 PetscFunctionBegin; 868 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 869 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 870 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 871 xs = x + shift; /* shift by one for index start of 1 */ 872 ls = ls + shift; 873 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 874 diag = A->diag; 875 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 876 if (flag & SOR_ZERO_INITIAL_GUESS) { 877 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 878 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 879 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 880 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 881 PetscFunctionReturn(0); 882 } 883 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 884 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 885 while (its--) { 886 /* go down through the rows */ 887 for ( i=0; i<m; i++ ) { 888 n = A->i[i+1] - A->i[i]; 889 PLogFlops(4*n+3); 890 idx = A->j + A->i[i] + shift; 891 v = A->a + A->i[i] + shift; 892 sum = b[i]; 893 SPARSEDENSEMDOT(sum,xs,v,idx,n); 894 d = fshift + A->a[diag[i]+shift]; 895 n = B->i[i+1] - B->i[i]; 896 idx = B->j + B->i[i] + shift; 897 v = B->a + B->i[i] + shift; 898 SPARSEDENSEMDOT(sum,ls,v,idx,n); 899 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 900 } 901 /* come up through the rows */ 902 for ( i=m-1; i>-1; i-- ) { 903 n = A->i[i+1] - A->i[i]; 904 PLogFlops(4*n+3) 905 idx = A->j + A->i[i] + shift; 906 v = A->a + A->i[i] + shift; 907 sum = b[i]; 908 SPARSEDENSEMDOT(sum,xs,v,idx,n); 909 d = fshift + A->a[diag[i]+shift]; 910 n = B->i[i+1] - B->i[i]; 911 idx = B->j + B->i[i] + shift; 912 v = B->a + B->i[i] + shift; 913 SPARSEDENSEMDOT(sum,ls,v,idx,n); 914 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 915 } 916 } 917 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 918 if (flag & SOR_ZERO_INITIAL_GUESS) { 919 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 920 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 921 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 922 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 923 PetscFunctionReturn(0); 924 } 925 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 926 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 927 while (its--) { 928 for ( i=0; i<m; i++ ) { 929 n = A->i[i+1] - A->i[i]; 930 PLogFlops(4*n+3); 931 idx = A->j + A->i[i] + shift; 932 v = A->a + A->i[i] + shift; 933 sum = b[i]; 934 SPARSEDENSEMDOT(sum,xs,v,idx,n); 935 d = fshift + A->a[diag[i]+shift]; 936 n = B->i[i+1] - B->i[i]; 937 idx = B->j + B->i[i] + shift; 938 v = B->a + B->i[i] + shift; 939 SPARSEDENSEMDOT(sum,ls,v,idx,n); 940 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 941 } 942 } 943 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 944 if (flag & SOR_ZERO_INITIAL_GUESS) { 945 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 946 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 947 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 948 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 952 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 953 while (its--) { 954 for ( i=m-1; i>-1; i-- ) { 955 n = A->i[i+1] - A->i[i]; 956 PLogFlops(4*n+3); 957 idx = A->j + A->i[i] + shift; 958 v = A->a + A->i[i] + shift; 959 sum = b[i]; 960 SPARSEDENSEMDOT(sum,xs,v,idx,n); 961 d = fshift + A->a[diag[i]+shift]; 962 n = B->i[i+1] - B->i[i]; 963 idx = B->j + B->i[i] + shift; 964 v = B->a + B->i[i] + shift; 965 SPARSEDENSEMDOT(sum,ls,v,idx,n); 966 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 967 } 968 } 969 } else { 970 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 971 } 972 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 973 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 974 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNC__ 979 #define __FUNC__ "MatGetInfo_MPIAIJ" 980 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 981 { 982 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 983 Mat A = mat->A, B = mat->B; 984 int ierr; 985 double isend[5], irecv[5]; 986 987 PetscFunctionBegin; 988 info->block_size = 1.0; 989 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 990 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 991 isend[3] = info->memory; isend[4] = info->mallocs; 992 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 993 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 994 isend[3] += info->memory; isend[4] += info->mallocs; 995 if (flag == MAT_LOCAL) { 996 info->nz_used = isend[0]; 997 info->nz_allocated = isend[1]; 998 info->nz_unneeded = isend[2]; 999 info->memory = isend[3]; 1000 info->mallocs = isend[4]; 1001 } else if (flag == MAT_GLOBAL_MAX) { 1002 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1003 info->nz_used = irecv[0]; 1004 info->nz_allocated = irecv[1]; 1005 info->nz_unneeded = irecv[2]; 1006 info->memory = irecv[3]; 1007 info->mallocs = irecv[4]; 1008 } else if (flag == MAT_GLOBAL_SUM) { 1009 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1010 info->nz_used = irecv[0]; 1011 info->nz_allocated = irecv[1]; 1012 info->nz_unneeded = irecv[2]; 1013 info->memory = irecv[3]; 1014 info->mallocs = irecv[4]; 1015 } 1016 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1017 info->fill_ratio_needed = 0; 1018 info->factor_mallocs = 0; 1019 info->rows_global = (double)mat->M; 1020 info->columns_global = (double)mat->N; 1021 info->rows_local = (double)mat->m; 1022 info->columns_local = (double)mat->N; 1023 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNC__ 1028 #define __FUNC__ "MatSetOption_MPIAIJ" 1029 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1030 { 1031 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1032 int ierr; 1033 1034 PetscFunctionBegin; 1035 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1036 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1037 op == MAT_COLUMNS_UNSORTED || 1038 op == MAT_COLUMNS_SORTED || 1039 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1040 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1041 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1042 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1043 } else if (op == MAT_ROW_ORIENTED) { 1044 a->roworiented = 1; 1045 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1046 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1047 } else if (op == MAT_ROWS_SORTED || 1048 op == MAT_ROWS_UNSORTED || 1049 op == MAT_SYMMETRIC || 1050 op == MAT_STRUCTURALLY_SYMMETRIC || 1051 op == MAT_YES_NEW_DIAGONALS) 1052 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1053 else if (op == MAT_COLUMN_ORIENTED) { 1054 a->roworiented = 0; 1055 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1056 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1057 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1058 a->donotstash = 1; 1059 } else if (op == MAT_NO_NEW_DIAGONALS){ 1060 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1061 } else { 1062 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1063 } 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNC__ 1068 #define __FUNC__ "MatGetSize_MPIAIJ" 1069 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1070 { 1071 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1072 1073 PetscFunctionBegin; 1074 if (m) *m = mat->M; 1075 if (n) *n = mat->N; 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNC__ 1080 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1081 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1082 { 1083 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1084 1085 PetscFunctionBegin; 1086 if (m) *m = mat->m; 1087 if (n) *n = mat->n; 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNC__ 1092 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1093 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1094 { 1095 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1096 1097 PetscFunctionBegin; 1098 *m = mat->rstart; *n = mat->rend; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNC__ 1103 #define __FUNC__ "MatGetRow_MPIAIJ" 1104 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1105 { 1106 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1107 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1108 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1109 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1110 int *cmap, *idx_p; 1111 1112 PetscFunctionBegin; 1113 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1114 mat->getrowactive = PETSC_TRUE; 1115 1116 if (!mat->rowvalues && (idx || v)) { 1117 /* 1118 allocate enough space to hold information from the longest row. 1119 */ 1120 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1121 int max = 1,m = mat->m,tmp; 1122 for ( i=0; i<m; i++ ) { 1123 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1124 if (max < tmp) { max = tmp; } 1125 } 1126 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1127 mat->rowindices = (int *) (mat->rowvalues + max); 1128 } 1129 1130 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1131 lrow = row - rstart; 1132 1133 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1134 if (!v) {pvA = 0; pvB = 0;} 1135 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1136 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1137 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1138 nztot = nzA + nzB; 1139 1140 cmap = mat->garray; 1141 if (v || idx) { 1142 if (nztot) { 1143 /* Sort by increasing column numbers, assuming A and B already sorted */ 1144 int imark = -1; 1145 if (v) { 1146 *v = v_p = mat->rowvalues; 1147 for ( i=0; i<nzB; i++ ) { 1148 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1149 else break; 1150 } 1151 imark = i; 1152 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1153 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1154 } 1155 if (idx) { 1156 *idx = idx_p = mat->rowindices; 1157 if (imark > -1) { 1158 for ( i=0; i<imark; i++ ) { 1159 idx_p[i] = cmap[cworkB[i]]; 1160 } 1161 } else { 1162 for ( i=0; i<nzB; i++ ) { 1163 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1164 else break; 1165 } 1166 imark = i; 1167 } 1168 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1169 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1170 } 1171 } else { 1172 if (idx) *idx = 0; 1173 if (v) *v = 0; 1174 } 1175 } 1176 *nz = nztot; 1177 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1178 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNC__ 1183 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1184 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1185 { 1186 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1187 1188 PetscFunctionBegin; 1189 if (aij->getrowactive == PETSC_FALSE) { 1190 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1191 } 1192 aij->getrowactive = PETSC_FALSE; 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNC__ 1197 #define __FUNC__ "MatNorm_MPIAIJ" 1198 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1199 { 1200 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1201 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1202 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1203 double sum = 0.0; 1204 Scalar *v; 1205 1206 PetscFunctionBegin; 1207 if (aij->size == 1) { 1208 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1209 } else { 1210 if (type == NORM_FROBENIUS) { 1211 v = amat->a; 1212 for (i=0; i<amat->nz; i++ ) { 1213 #if defined(USE_PETSC_COMPLEX) 1214 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1215 #else 1216 sum += (*v)*(*v); v++; 1217 #endif 1218 } 1219 v = bmat->a; 1220 for (i=0; i<bmat->nz; i++ ) { 1221 #if defined(USE_PETSC_COMPLEX) 1222 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1223 #else 1224 sum += (*v)*(*v); v++; 1225 #endif 1226 } 1227 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1228 *norm = sqrt(*norm); 1229 } else if (type == NORM_1) { /* max column norm */ 1230 double *tmp, *tmp2; 1231 int *jj, *garray = aij->garray; 1232 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1233 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1234 PetscMemzero(tmp,aij->N*sizeof(double)); 1235 *norm = 0.0; 1236 v = amat->a; jj = amat->j; 1237 for ( j=0; j<amat->nz; j++ ) { 1238 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1239 } 1240 v = bmat->a; jj = bmat->j; 1241 for ( j=0; j<bmat->nz; j++ ) { 1242 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1243 } 1244 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1245 for ( j=0; j<aij->N; j++ ) { 1246 if (tmp2[j] > *norm) *norm = tmp2[j]; 1247 } 1248 PetscFree(tmp); PetscFree(tmp2); 1249 } else if (type == NORM_INFINITY) { /* max row norm */ 1250 double ntemp = 0.0; 1251 for ( j=0; j<amat->m; j++ ) { 1252 v = amat->a + amat->i[j] + shift; 1253 sum = 0.0; 1254 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1255 sum += PetscAbsScalar(*v); v++; 1256 } 1257 v = bmat->a + bmat->i[j] + shift; 1258 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1259 sum += PetscAbsScalar(*v); v++; 1260 } 1261 if (sum > ntemp) ntemp = sum; 1262 } 1263 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1264 } else { 1265 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1266 } 1267 } 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNC__ 1272 #define __FUNC__ "MatTranspose_MPIAIJ" 1273 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1274 { 1275 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1276 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1277 int ierr,shift = Aloc->indexshift; 1278 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1279 Mat B; 1280 Scalar *array; 1281 1282 PetscFunctionBegin; 1283 if (matout == PETSC_NULL && M != N) { 1284 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1285 } 1286 1287 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1288 1289 /* copy over the A part */ 1290 Aloc = (Mat_SeqAIJ*) a->A->data; 1291 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1292 row = a->rstart; 1293 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1294 for ( i=0; i<m; i++ ) { 1295 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1296 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1297 } 1298 aj = Aloc->j; 1299 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1300 1301 /* copy over the B part */ 1302 Aloc = (Mat_SeqAIJ*) a->B->data; 1303 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1304 row = a->rstart; 1305 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1306 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1307 for ( i=0; i<m; i++ ) { 1308 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1309 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1310 } 1311 PetscFree(ct); 1312 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1313 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1314 if (matout != PETSC_NULL) { 1315 *matout = B; 1316 } else { 1317 PetscOps *Abops; 1318 struct _MatOps *Aops; 1319 1320 /* This isn't really an in-place transpose .... but free data structures from a */ 1321 PetscFree(a->rowners); 1322 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1323 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1324 #if defined (USE_CTABLE) 1325 if (a->colmap) TableDelete(a->colmap); 1326 #else 1327 if (a->colmap) PetscFree(a->colmap); 1328 #endif 1329 if (a->garray) PetscFree(a->garray); 1330 if (a->lvec) VecDestroy(a->lvec); 1331 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1332 PetscFree(a); 1333 1334 /* 1335 This is horrible, horrible code. We need to keep the 1336 A pointers for the bops and ops but copy everything 1337 else from C. 1338 */ 1339 Abops = A->bops; 1340 Aops = A->ops; 1341 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1342 A->bops = Abops; 1343 A->ops = Aops; 1344 PetscHeaderDestroy(B); 1345 } 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #undef __FUNC__ 1350 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1351 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1352 { 1353 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1354 Mat a = aij->A, b = aij->B; 1355 int ierr,s1,s2,s3; 1356 1357 PetscFunctionBegin; 1358 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1359 if (rr) { 1360 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1361 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1362 /* Overlap communication with computation. */ 1363 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1364 } 1365 if (ll) { 1366 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1367 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1368 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1369 } 1370 /* scale the diagonal block */ 1371 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1372 1373 if (rr) { 1374 /* Do a scatter end and then right scale the off-diagonal block */ 1375 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1376 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1377 } 1378 1379 PetscFunctionReturn(0); 1380 } 1381 1382 1383 #undef __FUNC__ 1384 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1385 int MatPrintHelp_MPIAIJ(Mat A) 1386 { 1387 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1388 int ierr; 1389 1390 PetscFunctionBegin; 1391 if (!a->rank) { 1392 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1393 } 1394 PetscFunctionReturn(0); 1395 } 1396 1397 #undef __FUNC__ 1398 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1399 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1400 { 1401 PetscFunctionBegin; 1402 *bs = 1; 1403 PetscFunctionReturn(0); 1404 } 1405 #undef __FUNC__ 1406 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1407 int MatSetUnfactored_MPIAIJ(Mat A) 1408 { 1409 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1410 int ierr; 1411 1412 PetscFunctionBegin; 1413 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNC__ 1418 #define __FUNC__ "MatEqual_MPIAIJ" 1419 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1420 { 1421 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1422 Mat a, b, c, d; 1423 PetscTruth flg; 1424 int ierr; 1425 1426 PetscFunctionBegin; 1427 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1428 a = matA->A; b = matA->B; 1429 c = matB->A; d = matB->B; 1430 1431 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1432 if (flg == PETSC_TRUE) { 1433 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1434 } 1435 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNC__ 1440 #define __FUNC__ "MatCopy_MPIAIJ" 1441 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1442 { 1443 int ierr; 1444 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1445 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1446 1447 PetscFunctionBegin; 1448 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1449 /* because of the column compression in the off-processor part of the matrix a->B, 1450 the number of columns in a->B and b->B may be different, hence we cannot call 1451 the MatCopy() directly on the two parts. If need be, we can provide a more 1452 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1453 then copying the submatrices */ 1454 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1455 } else { 1456 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1457 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1458 } 1459 PetscFunctionReturn(0); 1460 } 1461 1462 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1463 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1464 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1465 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1466 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1467 1468 /* -------------------------------------------------------------------*/ 1469 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1470 MatGetRow_MPIAIJ, 1471 MatRestoreRow_MPIAIJ, 1472 MatMult_MPIAIJ, 1473 MatMultAdd_MPIAIJ, 1474 MatMultTrans_MPIAIJ, 1475 MatMultTransAdd_MPIAIJ, 1476 0, 1477 0, 1478 0, 1479 0, 1480 0, 1481 0, 1482 MatRelax_MPIAIJ, 1483 MatTranspose_MPIAIJ, 1484 MatGetInfo_MPIAIJ, 1485 MatEqual_MPIAIJ, 1486 MatGetDiagonal_MPIAIJ, 1487 MatDiagonalScale_MPIAIJ, 1488 MatNorm_MPIAIJ, 1489 MatAssemblyBegin_MPIAIJ, 1490 MatAssemblyEnd_MPIAIJ, 1491 0, 1492 MatSetOption_MPIAIJ, 1493 MatZeroEntries_MPIAIJ, 1494 MatZeroRows_MPIAIJ, 1495 0, 1496 0, 1497 0, 1498 0, 1499 MatGetSize_MPIAIJ, 1500 MatGetLocalSize_MPIAIJ, 1501 MatGetOwnershipRange_MPIAIJ, 1502 0, 1503 0, 1504 0, 1505 0, 1506 MatDuplicate_MPIAIJ, 1507 0, 1508 0, 1509 0, 1510 0, 1511 0, 1512 MatGetSubMatrices_MPIAIJ, 1513 MatIncreaseOverlap_MPIAIJ, 1514 MatGetValues_MPIAIJ, 1515 MatCopy_MPIAIJ, 1516 MatPrintHelp_MPIAIJ, 1517 MatScale_MPIAIJ, 1518 0, 1519 0, 1520 0, 1521 MatGetBlockSize_MPIAIJ, 1522 0, 1523 0, 1524 0, 1525 0, 1526 MatFDColoringCreate_MPIAIJ, 1527 0, 1528 MatSetUnfactored_MPIAIJ, 1529 0, 1530 0, 1531 MatGetSubMatrix_MPIAIJ, 1532 0, 1533 0, 1534 MatGetMaps_Petsc}; 1535 1536 /* ----------------------------------------------------------------------------------------*/ 1537 1538 EXTERN_C_BEGIN 1539 #undef __FUNC__ 1540 #define __FUNC__ "MatStoreValues_MPIAIJ" 1541 int MatStoreValues_MPIAIJ(Mat mat) 1542 { 1543 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1544 int ierr; 1545 1546 PetscFunctionBegin; 1547 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1548 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1549 PetscFunctionReturn(0); 1550 } 1551 EXTERN_C_END 1552 1553 EXTERN_C_BEGIN 1554 #undef __FUNC__ 1555 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1556 int MatRetrieveValues_MPIAIJ(Mat mat) 1557 { 1558 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1559 int ierr; 1560 1561 PetscFunctionBegin; 1562 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1563 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1564 PetscFunctionReturn(0); 1565 } 1566 EXTERN_C_END 1567 1568 #include "pc.h" 1569 EXTERN_C_BEGIN 1570 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1571 EXTERN_C_END 1572 1573 #undef __FUNC__ 1574 #define __FUNC__ "MatCreateMPIAIJ" 1575 /*@C 1576 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1577 (the default parallel PETSc format). For good matrix assembly performance 1578 the user should preallocate the matrix storage by setting the parameters 1579 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1580 performance can be increased by more than a factor of 50. 1581 1582 Collective on MPI_Comm 1583 1584 Input Parameters: 1585 + comm - MPI communicator 1586 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1587 This value should be the same as the local size used in creating the 1588 y vector for the matrix-vector product y = Ax. 1589 . n - This value should be the same as the local size used in creating the 1590 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1591 calculated if N is given) For square matrices n is almost always m. 1592 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1593 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1594 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1595 (same value is used for all local rows) 1596 . d_nnz - array containing the number of nonzeros in the various rows of the 1597 DIAGONAL portion of the local submatrix (possibly different for each row) 1598 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1599 The size of this array is equal to the number of local rows, i.e 'm'. 1600 You must leave room for the diagonal entry even if it is zero. 1601 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1602 submatrix (same value is used for all local rows). 1603 - o_nnz - array containing the number of nonzeros in the various rows of the 1604 OFF-DIAGONAL portion of the local submatrix (possibly different for 1605 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1606 structure. The size of this array is equal to the number 1607 of local rows, i.e 'm'. 1608 1609 Output Parameter: 1610 . A - the matrix 1611 1612 Notes: 1613 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1614 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1615 storage requirements for this matrix. 1616 1617 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1618 processor than it must be used on all processors that share the object for 1619 that argument. 1620 1621 The AIJ format (also called the Yale sparse matrix format or 1622 compressed row storage), is fully compatible with standard Fortran 77 1623 storage. That is, the stored row and column indices can begin at 1624 either one (as in Fortran) or zero. See the users manual for details. 1625 1626 The user MUST specify either the local or global matrix dimensions 1627 (possibly both). 1628 1629 The parallel matrix is partitioned such that the first m0 rows belong to 1630 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1631 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1632 1633 The DIAGONAL portion of the local submatrix of a processor can be defined 1634 as the submatrix which is obtained by extraction the part corresponding 1635 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1636 first row that belongs to the processor, and r2 is the last row belonging 1637 to the this processor. This is a square mxm matrix. The remaining portion 1638 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1639 1640 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1641 1642 By default, this format uses inodes (identical nodes) when possible. 1643 We search for consecutive rows with the same nonzero structure, thereby 1644 reusing matrix information to achieve increased efficiency. 1645 1646 Options Database Keys: 1647 + -mat_aij_no_inode - Do not use inodes 1648 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1649 - -mat_aij_oneindex - Internally use indexing starting at 1 1650 rather than 0. Note that when calling MatSetValues(), 1651 the user still MUST index entries starting at 0! 1652 . -mat_mpi - use the parallel matrix data structures even on one processor 1653 (defaults to using SeqBAIJ format on one processor) 1654 . -mat_mpi - use the parallel matrix data structures even on one processor 1655 (defaults to using SeqAIJ format on one processor) 1656 1657 1658 Example usage: 1659 1660 Consider the following 8x8 matrix with 34 non-zero values, that is 1661 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1662 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1663 as follows: 1664 1665 .vb 1666 1 2 0 | 0 3 0 | 0 4 1667 Proc0 0 5 6 | 7 0 0 | 8 0 1668 9 0 10 | 11 0 0 | 12 0 1669 ------------------------------------- 1670 13 0 14 | 15 16 17 | 0 0 1671 Proc1 0 18 0 | 19 20 21 | 0 0 1672 0 0 0 | 22 23 0 | 24 0 1673 ------------------------------------- 1674 Proc2 25 26 27 | 0 0 28 | 29 0 1675 30 0 0 | 31 32 33 | 0 34 1676 .ve 1677 1678 This can be represented as a collection of submatrices as: 1679 1680 .vb 1681 A B C 1682 D E F 1683 G H I 1684 .ve 1685 1686 Where the submatrices A,B,C are owned by proc0, D,E,F are 1687 owned by proc1, G,H,I are owned by proc2. 1688 1689 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1690 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1691 The 'M','N' parameters are 8,8, and have the same values on all procs. 1692 1693 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1694 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1695 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1696 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1697 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1698 matrix, ans [DF] as another SeqAIJ matrix. 1699 1700 When d_nz, o_nz parameters are specified, d_nz storage elements are 1701 allocated for every row of the local diagonal submatrix, and o_nz 1702 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1703 One way to choose d_nz and o_nz is to use the max nonzerors per local 1704 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1705 In this case, the values of d_nz,o_nz are: 1706 .vb 1707 proc0 : dnz = 2, o_nz = 2 1708 proc1 : dnz = 3, o_nz = 2 1709 proc2 : dnz = 1, o_nz = 4 1710 .ve 1711 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1712 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1713 for proc3. i.e we are using 12+15+10=37 storage locations to store 1714 34 values. 1715 1716 When d_nnz, o_nnz parameters are specified, the storage is specified 1717 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1718 In the above case the values for d_nnz,o_nnz are: 1719 .vb 1720 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1721 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1722 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1723 .ve 1724 Here the space allocated is sum of all the above values i.e 34, and 1725 hence pre-allocation is perfect. 1726 1727 Level: intermediate 1728 1729 .keywords: matrix, aij, compressed row, sparse, parallel 1730 1731 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1732 @*/ 1733 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1734 { 1735 Mat B; 1736 Mat_MPIAIJ *b; 1737 int ierr, i,size,flag1 = 0, flag2 = 0; 1738 1739 PetscFunctionBegin; 1740 MPI_Comm_size(comm,&size); 1741 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 1742 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1743 if (!flag1 && !flag2 && size == 1) { 1744 if (M == PETSC_DECIDE) M = m; 1745 if (N == PETSC_DECIDE) N = n; 1746 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1747 PetscFunctionReturn(0); 1748 } 1749 1750 *A = 0; 1751 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1752 PLogObjectCreate(B); 1753 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1754 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1755 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1756 B->ops->destroy = MatDestroy_MPIAIJ; 1757 B->ops->view = MatView_MPIAIJ; 1758 B->factor = 0; 1759 B->assembled = PETSC_FALSE; 1760 B->mapping = 0; 1761 1762 B->insertmode = NOT_SET_VALUES; 1763 b->size = size; 1764 MPI_Comm_rank(comm,&b->rank); 1765 1766 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1767 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1768 } 1769 1770 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1771 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1772 b->m = m; B->m = m; 1773 b->n = n; B->n = n; 1774 b->N = N; B->N = N; 1775 b->M = M; B->M = M; 1776 1777 /* the information in the maps duplicates the information computed below, eventually 1778 we should remove the duplicate information that is not contained in the maps */ 1779 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1780 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1781 1782 /* build local table of row and column ownerships */ 1783 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1784 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1785 b->cowners = b->rowners + b->size + 2; 1786 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1787 b->rowners[0] = 0; 1788 for ( i=2; i<=b->size; i++ ) { 1789 b->rowners[i] += b->rowners[i-1]; 1790 } 1791 b->rstart = b->rowners[b->rank]; 1792 b->rend = b->rowners[b->rank+1]; 1793 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1794 b->cowners[0] = 0; 1795 for ( i=2; i<=b->size; i++ ) { 1796 b->cowners[i] += b->cowners[i-1]; 1797 } 1798 b->cstart = b->cowners[b->rank]; 1799 b->cend = b->cowners[b->rank+1]; 1800 1801 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1802 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1803 PLogObjectParent(B,b->A); 1804 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1805 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1806 PLogObjectParent(B,b->B); 1807 1808 /* build cache for off array entries formed */ 1809 ierr = StashCreate_Private(comm,1,1,&b->stash); CHKERRQ(ierr); 1810 b->donotstash = 0; 1811 b->colmap = 0; 1812 b->garray = 0; 1813 b->roworiented = 1; 1814 1815 /* stuff used for matrix vector multiply */ 1816 b->lvec = 0; 1817 b->Mvctx = 0; 1818 1819 /* stuff for MatGetRow() */ 1820 b->rowindices = 0; 1821 b->rowvalues = 0; 1822 b->getrowactive = PETSC_FALSE; 1823 1824 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1825 "MatStoreValues_MPIAIJ", 1826 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1827 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1828 "MatRetrieveValues_MPIAIJ", 1829 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1830 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1831 "MatGetDiagonalBlock_MPIAIJ", 1832 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1833 *A = B; 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNC__ 1838 #define __FUNC__ "MatDuplicate_MPIAIJ" 1839 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1840 { 1841 Mat mat; 1842 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1843 int ierr, len=0, flg; 1844 1845 PetscFunctionBegin; 1846 *newmat = 0; 1847 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1848 PLogObjectCreate(mat); 1849 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1850 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1851 mat->ops->destroy = MatDestroy_MPIAIJ; 1852 mat->ops->view = MatView_MPIAIJ; 1853 mat->factor = matin->factor; 1854 mat->assembled = PETSC_TRUE; 1855 mat->insertmode = NOT_SET_VALUES; 1856 1857 a->m = mat->m = oldmat->m; 1858 a->n = mat->n = oldmat->n; 1859 a->M = mat->M = oldmat->M; 1860 a->N = mat->N = oldmat->N; 1861 1862 a->rstart = oldmat->rstart; 1863 a->rend = oldmat->rend; 1864 a->cstart = oldmat->cstart; 1865 a->cend = oldmat->cend; 1866 a->size = oldmat->size; 1867 a->rank = oldmat->rank; 1868 a->donotstash = oldmat->donotstash; 1869 a->roworiented = oldmat->roworiented; 1870 a->rowindices = 0; 1871 a->rowvalues = 0; 1872 a->getrowactive = PETSC_FALSE; 1873 1874 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1875 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1876 a->cowners = a->rowners + a->size + 2; 1877 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1878 ierr = StashCreate_Private(matin->comm,1,1,&a->stash); CHKERRQ(ierr); 1879 if (oldmat->colmap) { 1880 #if defined (USE_CTABLE) 1881 ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 1882 #else 1883 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1884 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1885 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1886 #endif 1887 } else a->colmap = 0; 1888 if (oldmat->garray) { 1889 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1890 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1891 PLogObjectMemory(mat,len*sizeof(int)); 1892 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1893 } else a->garray = 0; 1894 1895 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1896 PLogObjectParent(mat,a->lvec); 1897 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1898 PLogObjectParent(mat,a->Mvctx); 1899 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 1900 PLogObjectParent(mat,a->A); 1901 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 1902 PLogObjectParent(mat,a->B); 1903 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1904 if (flg) { 1905 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1906 } 1907 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1908 *newmat = mat; 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #include "sys.h" 1913 1914 #undef __FUNC__ 1915 #define __FUNC__ "MatLoad_MPIAIJ" 1916 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1917 { 1918 Mat A; 1919 Scalar *vals,*svals; 1920 MPI_Comm comm = ((PetscObject)viewer)->comm; 1921 MPI_Status status; 1922 int i, nz, ierr, j,rstart, rend, fd; 1923 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1924 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1925 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1926 1927 PetscFunctionBegin; 1928 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1929 if (!rank) { 1930 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1931 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1932 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1933 if (header[3] < 0) { 1934 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1935 } 1936 } 1937 1938 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1939 M = header[1]; N = header[2]; 1940 /* determine ownership of all rows */ 1941 m = M/size + ((M % size) > rank); 1942 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1943 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1944 rowners[0] = 0; 1945 for ( i=2; i<=size; i++ ) { 1946 rowners[i] += rowners[i-1]; 1947 } 1948 rstart = rowners[rank]; 1949 rend = rowners[rank+1]; 1950 1951 /* distribute row lengths to all processors */ 1952 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1953 offlens = ourlens + (rend-rstart); 1954 if (!rank) { 1955 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1956 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1957 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1958 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1959 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1960 PetscFree(sndcounts); 1961 } else { 1962 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1963 } 1964 1965 if (!rank) { 1966 /* calculate the number of nonzeros on each processor */ 1967 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1968 PetscMemzero(procsnz,size*sizeof(int)); 1969 for ( i=0; i<size; i++ ) { 1970 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1971 procsnz[i] += rowlengths[j]; 1972 } 1973 } 1974 PetscFree(rowlengths); 1975 1976 /* determine max buffer needed and allocate it */ 1977 maxnz = 0; 1978 for ( i=0; i<size; i++ ) { 1979 maxnz = PetscMax(maxnz,procsnz[i]); 1980 } 1981 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1982 1983 /* read in my part of the matrix column indices */ 1984 nz = procsnz[0]; 1985 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1986 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1987 1988 /* read in every one elses and ship off */ 1989 for ( i=1; i<size; i++ ) { 1990 nz = procsnz[i]; 1991 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1992 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1993 } 1994 PetscFree(cols); 1995 } else { 1996 /* determine buffer space needed for message */ 1997 nz = 0; 1998 for ( i=0; i<m; i++ ) { 1999 nz += ourlens[i]; 2000 } 2001 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2002 2003 /* receive message of column indices*/ 2004 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2005 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2006 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2007 } 2008 2009 /* determine column ownership if matrix is not square */ 2010 if (N != M) { 2011 n = N/size + ((N % size) > rank); 2012 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2013 cstart = cend - n; 2014 } else { 2015 cstart = rstart; 2016 cend = rend; 2017 n = cend - cstart; 2018 } 2019 2020 /* loop over local rows, determining number of off diagonal entries */ 2021 PetscMemzero(offlens,m*sizeof(int)); 2022 jj = 0; 2023 for ( i=0; i<m; i++ ) { 2024 for ( j=0; j<ourlens[i]; j++ ) { 2025 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2026 jj++; 2027 } 2028 } 2029 2030 /* create our matrix */ 2031 for ( i=0; i<m; i++ ) { 2032 ourlens[i] -= offlens[i]; 2033 } 2034 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2035 A = *newmat; 2036 ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2037 for ( i=0; i<m; i++ ) { 2038 ourlens[i] += offlens[i]; 2039 } 2040 2041 if (!rank) { 2042 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2043 2044 /* read in my part of the matrix numerical values */ 2045 nz = procsnz[0]; 2046 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2047 2048 /* insert into matrix */ 2049 jj = rstart; 2050 smycols = mycols; 2051 svals = vals; 2052 for ( i=0; i<m; i++ ) { 2053 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2054 smycols += ourlens[i]; 2055 svals += ourlens[i]; 2056 jj++; 2057 } 2058 2059 /* read in other processors and ship out */ 2060 for ( i=1; i<size; i++ ) { 2061 nz = procsnz[i]; 2062 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2063 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2064 } 2065 PetscFree(procsnz); 2066 } else { 2067 /* receive numeric values */ 2068 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2069 2070 /* receive message of values*/ 2071 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2072 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2073 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2074 2075 /* insert into matrix */ 2076 jj = rstart; 2077 smycols = mycols; 2078 svals = vals; 2079 for ( i=0; i<m; i++ ) { 2080 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2081 smycols += ourlens[i]; 2082 svals += ourlens[i]; 2083 jj++; 2084 } 2085 } 2086 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2087 2088 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2089 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2090 PetscFunctionReturn(0); 2091 } 2092 2093 #undef __FUNC__ 2094 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2095 /* 2096 Not great since it makes two copies of the submatrix, first an SeqAIJ 2097 in local and then by concatenating the local matrices the end result. 2098 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2099 */ 2100 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2101 { 2102 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2103 Mat *local,M, Mreuse; 2104 Scalar *vwork,*aa; 2105 MPI_Comm comm = mat->comm; 2106 Mat_SeqAIJ *aij; 2107 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2108 2109 PetscFunctionBegin; 2110 MPI_Comm_rank(comm,&rank); 2111 MPI_Comm_size(comm,&size); 2112 2113 if (call == MAT_REUSE_MATRIX) { 2114 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2115 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2116 local = &Mreuse; 2117 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2118 } else { 2119 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2120 Mreuse = *local; 2121 PetscFree(local); 2122 } 2123 2124 /* 2125 m - number of local rows 2126 n - number of columns (same on all processors) 2127 rstart - first row in new global matrix generated 2128 */ 2129 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2130 if (call == MAT_INITIAL_MATRIX) { 2131 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2132 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2133 ii = aij->i; 2134 jj = aij->j; 2135 2136 /* 2137 Determine the number of non-zeros in the diagonal and off-diagonal 2138 portions of the matrix in order to do correct preallocation 2139 */ 2140 2141 /* first get start and end of "diagonal" columns */ 2142 if (csize == PETSC_DECIDE) { 2143 nlocal = n/size + ((n % size) > rank); 2144 } else { 2145 nlocal = csize; 2146 } 2147 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2148 rstart = rend - nlocal; 2149 if (rank == size - 1 && rend != n) { 2150 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2151 } 2152 2153 /* next, compute all the lengths */ 2154 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2155 olens = dlens + m; 2156 for ( i=0; i<m; i++ ) { 2157 jend = ii[i+1] - ii[i]; 2158 olen = 0; 2159 dlen = 0; 2160 for ( j=0; j<jend; j++ ) { 2161 if ( *jj < rstart || *jj >= rend) olen++; 2162 else dlen++; 2163 jj++; 2164 } 2165 olens[i] = olen; 2166 dlens[i] = dlen; 2167 } 2168 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2169 PetscFree(dlens); 2170 } else { 2171 int ml,nl; 2172 2173 M = *newmat; 2174 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2175 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2176 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2177 /* 2178 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2179 rather than the slower MatSetValues(). 2180 */ 2181 M->was_assembled = PETSC_TRUE; 2182 M->assembled = PETSC_FALSE; 2183 } 2184 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2185 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2186 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2187 ii = aij->i; 2188 jj = aij->j; 2189 aa = aij->a; 2190 for (i=0; i<m; i++) { 2191 row = rstart + i; 2192 nz = ii[i+1] - ii[i]; 2193 cwork = jj; jj += nz; 2194 vwork = aa; aa += nz; 2195 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2196 } 2197 2198 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2199 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2200 *newmat = M; 2201 2202 /* save submatrix used in processor for next request */ 2203 if (call == MAT_INITIAL_MATRIX) { 2204 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2205 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2206 } 2207 2208 PetscFunctionReturn(0); 2209 } 2210