1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.287 1999/03/11 22:30:32 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 extern int MatSetUpMultiply_MPIAIJ(Mat); 10 extern int DisAssemble_MPIAIJ(Mat); 11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatPrintHelp_SeqAIJ(Mat); 15 16 /* local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 27 int n = B->n,i; 28 #if defined (USE_CTABLE) 29 int ierr; 30 #endif 31 32 PetscFunctionBegin; 33 #if defined (USE_CTABLE) 34 ierr = TableCreate(aij->n/5,&aij->colmap); CHKERRQ(ierr); 35 for ( i=0; i<n; i++ ){ 36 ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 37 } 38 #else 39 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 40 PLogObjectMemory(mat,aij->N*sizeof(int)); 41 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 42 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 43 #endif 44 PetscFunctionReturn(0); 45 } 46 47 #define CHUNKSIZE 15 48 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 49 { \ 50 \ 51 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 52 rmax = aimax[row]; nrow = ailen[row]; \ 53 col1 = col - shift; \ 54 \ 55 low = 0; high = nrow; \ 56 while (high-low > 5) { \ 57 t = (low+high)/2; \ 58 if (rp[t] > col) high = t; \ 59 else low = t; \ 60 } \ 61 for ( _i=0; _i<nrow; _i++ ) { \ 62 if (rp[_i] > col1) break; \ 63 if (rp[_i] == col1) { \ 64 if (addv == ADD_VALUES) ap[_i] += value; \ 65 else ap[_i] = value; \ 66 goto a_noinsert; \ 67 } \ 68 } \ 69 if (nonew == 1) goto a_noinsert; \ 70 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 71 if (nrow >= rmax) { \ 72 /* there is no extra room in row, therefore enlarge */ \ 73 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 74 Scalar *new_a; \ 75 \ 76 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 77 \ 78 /* malloc new storage space */ \ 79 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 80 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 81 new_j = (int *) (new_a + new_nz); \ 82 new_i = new_j + new_nz; \ 83 \ 84 /* copy over old data into new slots */ \ 85 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 86 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 87 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 88 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 89 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 90 len*sizeof(int)); \ 91 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 92 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 93 len*sizeof(Scalar)); \ 94 /* free up old matrix storage */ \ 95 \ 96 PetscFree(a->a); \ 97 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = 1; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for ( ii=N; ii>=_i; ii-- ) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for ( _i=0; _i<nrow; _i++ ) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 152 new_j = (int *) (new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 157 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int)); \ 162 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 163 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar)); \ 165 /* free up old matrix storage */ \ 166 \ 167 PetscFree(b->a); \ 168 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 169 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 170 b->singlemalloc = 1; \ 171 \ 172 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 173 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 174 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 175 b->maxnz += CHUNKSIZE; \ 176 b->reallocs++; \ 177 } \ 178 N = nrow++ - 1; b->nz++; \ 179 /* shift up all the later entries in this row */ \ 180 for ( ii=N; ii>=_i; ii-- ) { \ 181 rp[ii+1] = rp[ii]; \ 182 ap[ii+1] = ap[ii]; \ 183 } \ 184 rp[_i] = col1; \ 185 ap[_i] = value; \ 186 b_noinsert: ; \ 187 bilen[row] = nrow; \ 188 } 189 190 #undef __FUNC__ 191 #define __FUNC__ "MatSetValues_MPIAIJ" 192 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 193 { 194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 195 Scalar value; 196 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 197 int cstart = aij->cstart, cend = aij->cend,row,col; 198 int roworiented = aij->roworiented; 199 200 /* Some Variables required in the macro */ 201 Mat A = aij->A; 202 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 203 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 204 Scalar *aa = a->a; 205 206 Mat B = aij->B; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 208 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 209 Scalar *ba = b->a; 210 211 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 212 int nonew = a->nonew,shift = a->indexshift; 213 Scalar *ap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 if (im[i] < 0) continue; 218 #if defined(USE_PETSC_BOPT_g) 219 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart && im[i] < rend) { 222 row = im[i] - rstart; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart && in[j] < cend){ 225 col = in[j] - cstart; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 else if (in[j] < 0) continue; 231 #if defined(USE_PETSC_BOPT_g) 232 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!aij->colmap) { 237 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined (USE_CTABLE) 240 ierr = TableFind(aij->colmap,in[j]+1,&col); CHKERRQ(ierr); 241 col--; 242 #else 243 col = aij->colmap[in[j]] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 246 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 249 B = aij->B; 250 b = (Mat_SeqAIJ *) B->data; 251 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 252 ba = b->a; 253 } 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 256 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 257 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 258 } 259 } 260 } else { 261 if (!aij->donotstash) { 262 if (roworiented) { 263 ierr = StashValuesRoworiented_Private(&aij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 264 } else { 265 ierr = StashValuesColumnoriented_Private(&aij->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNC__ 274 #define __FUNC__ "MatGetValues_MPIAIJ" 275 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 278 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 279 int cstart = aij->cstart, cend = aij->cend,row,col; 280 281 PetscFunctionBegin; 282 for ( i=0; i<m; i++ ) { 283 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 284 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 285 if (idxm[i] >= rstart && idxm[i] < rend) { 286 row = idxm[i] - rstart; 287 for ( j=0; j<n; j++ ) { 288 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 289 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 290 if (idxn[j] >= cstart && idxn[j] < cend){ 291 col = idxn[j] - cstart; 292 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 293 } else { 294 if (!aij->colmap) { 295 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 296 } 297 #if defined (USE_CTABLE) 298 ierr = TableFind(aij->colmap,idxn[j]+1,&col); CHKERRQ(ierr); 299 col --; 300 #else 301 col = aij->colmap[idxn[j]] - 1; 302 #endif 303 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 304 else { 305 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 306 } 307 } 308 } 309 } else { 310 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 311 } 312 } 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNC__ 317 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 318 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 319 { 320 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 321 int ierr,nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = StashScatterBegin_Private(&aij->stash,aij->rowners); CHKERRQ(ierr); 337 ierr = StashGetInfo_Private(&aij->stash,&nstash,&reallocs); CHKERRQ(ierr); 338 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n", 339 nstash,reallocs); 340 PetscFunctionReturn(0); 341 } 342 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 346 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 347 { 348 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 349 int i,j,rstart,ncols,n,ierr,flg; 350 int *row,*col,other_disassembled; 351 Scalar *val; 352 InsertMode addv = mat->insertmode; 353 354 PetscFunctionBegin; 355 if (!aij->donotstash) { 356 while (1) { 357 ierr = StashScatterGetMesg_Private(&aij->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr); 358 if (!flg) break; 359 360 for ( i=0; i<n; ) { 361 /* Now identify the consecutive vals belonging to the same row */ 362 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 363 if (j < n) ncols = j-i; 364 else ncols = n-i; 365 /* Now assemble all these values with a single function call */ 366 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 367 i = j; 368 } 369 } 370 ierr = StashScatterEnd_Private(&aij->stash); CHKERRQ(ierr); 371 } 372 373 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 374 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 375 376 /* determine if any processor has disassembled, if so we must 377 also disassemble ourselfs, in order that we may reassemble. */ 378 /* 379 if nonzero structure of submatrix B cannot change then we know that 380 no processor disassembled thus we can skip this stuff 381 */ 382 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 383 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 384 if (mat->was_assembled && !other_disassembled) { 385 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 386 } 387 } 388 389 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 390 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 391 } 392 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 394 395 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 396 PetscFunctionReturn(0); 397 } 398 399 #undef __FUNC__ 400 #define __FUNC__ "MatZeroEntries_MPIAIJ" 401 int MatZeroEntries_MPIAIJ(Mat A) 402 { 403 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 404 int ierr; 405 406 PetscFunctionBegin; 407 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 408 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 #undef __FUNC__ 413 #define __FUNC__ "MatZeroRows_MPIAIJ" 414 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 415 { 416 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 417 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 418 int *procs,*nprocs,j,found,idx,nsends,*work,row; 419 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 420 int *rvalues,tag = A->tag,count,base,slen,n,*source; 421 int *lens,imdex,*lrows,*values,rstart=l->rstart; 422 MPI_Comm comm = A->comm; 423 MPI_Request *send_waits,*recv_waits; 424 MPI_Status recv_status,*send_status; 425 IS istmp; 426 427 PetscFunctionBegin; 428 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 429 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 430 431 /* first count number of contributors to each processor */ 432 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 433 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 434 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 435 for ( i=0; i<N; i++ ) { 436 idx = rows[i]; 437 found = 0; 438 for ( j=0; j<size; j++ ) { 439 if (idx >= owners[j] && idx < owners[j+1]) { 440 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 441 } 442 } 443 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 444 } 445 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 446 447 /* inform other processors of number of messages and max length*/ 448 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 449 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 450 nrecvs = work[rank]; 451 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 452 nmax = work[rank]; 453 PetscFree(work); 454 455 /* post receives: */ 456 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 457 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 458 for ( i=0; i<nrecvs; i++ ) { 459 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 460 } 461 462 /* do sends: 463 1) starts[i] gives the starting index in svalues for stuff going to 464 the ith processor 465 */ 466 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 467 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 468 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 469 starts[0] = 0; 470 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 471 for ( i=0; i<N; i++ ) { 472 svalues[starts[owner[i]]++] = rows[i]; 473 } 474 ISRestoreIndices(is,&rows); 475 476 starts[0] = 0; 477 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 478 count = 0; 479 for ( i=0; i<size; i++ ) { 480 if (procs[i]) { 481 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 482 } 483 } 484 PetscFree(starts); 485 486 base = owners[rank]; 487 488 /* wait on receives */ 489 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 490 source = lens + nrecvs; 491 count = nrecvs; slen = 0; 492 while (count) { 493 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 494 /* unpack receives into our local space */ 495 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 496 source[imdex] = recv_status.MPI_SOURCE; 497 lens[imdex] = n; 498 slen += n; 499 count--; 500 } 501 PetscFree(recv_waits); 502 503 /* move the data into the send scatter */ 504 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 505 count = 0; 506 for ( i=0; i<nrecvs; i++ ) { 507 values = rvalues + i*nmax; 508 for ( j=0; j<lens[i]; j++ ) { 509 lrows[count++] = values[j] - base; 510 } 511 } 512 PetscFree(rvalues); PetscFree(lens); 513 PetscFree(owner); PetscFree(nprocs); 514 515 /* actually zap the local rows */ 516 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 517 PLogObjectParent(A,istmp); 518 519 /* 520 Zero the required rows. If the "diagonal block" of the matrix 521 is square and the user wishes to set the diagonal we use seperate 522 code so that MatSetValues() is not called for each diagonal allocating 523 new memory, thus calling lots of mallocs and slowing things down. 524 525 Contributed by: Mathew Knepley 526 */ 527 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 528 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 529 if (diag && (l->A->M == l->A->N)) { 530 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 531 } else if (diag) { 532 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 533 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 534 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 535 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 536 } 537 for ( i = 0; i < slen; i++ ) { 538 row = lrows[i] + rstart; 539 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 540 } 541 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 542 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 543 } else { 544 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 545 } 546 ierr = ISDestroy(istmp); CHKERRQ(ierr); 547 PetscFree(lrows); 548 549 /* wait on sends */ 550 if (nsends) { 551 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 552 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 553 PetscFree(send_status); 554 } 555 PetscFree(send_waits); PetscFree(svalues); 556 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNC__ 561 #define __FUNC__ "MatMult_MPIAIJ" 562 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 563 { 564 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 565 int ierr,nt; 566 567 PetscFunctionBegin; 568 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 569 if (nt != a->n) { 570 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 571 } 572 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 573 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 574 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 575 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNC__ 580 #define __FUNC__ "MatMultAdd_MPIAIJ" 581 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 582 { 583 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 584 int ierr; 585 586 PetscFunctionBegin; 587 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 588 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 589 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 590 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 #undef __FUNC__ 595 #define __FUNC__ "MatMultTrans_MPIAIJ" 596 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 597 { 598 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 599 int ierr; 600 601 PetscFunctionBegin; 602 /* do nondiagonal part */ 603 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 604 /* send it on its way */ 605 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 606 /* do local part */ 607 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 608 /* receive remote parts: note this assumes the values are not actually */ 609 /* inserted in yy until the next line, which is true for my implementation*/ 610 /* but is not perhaps always true. */ 611 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 612 PetscFunctionReturn(0); 613 } 614 615 #undef __FUNC__ 616 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 617 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 618 { 619 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 620 int ierr; 621 622 PetscFunctionBegin; 623 /* do nondiagonal part */ 624 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 625 /* send it on its way */ 626 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 627 /* do local part */ 628 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 629 /* receive remote parts: note this assumes the values are not actually */ 630 /* inserted in yy until the next line, which is true for my implementation*/ 631 /* but is not perhaps always true. */ 632 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 633 PetscFunctionReturn(0); 634 } 635 636 /* 637 This only works correctly for square matrices where the subblock A->A is the 638 diagonal block 639 */ 640 #undef __FUNC__ 641 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 642 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 643 { 644 int ierr; 645 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 646 647 PetscFunctionBegin; 648 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 649 if (a->rstart != a->cstart || a->rend != a->cend) { 650 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 651 } 652 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNC__ 657 #define __FUNC__ "MatScale_MPIAIJ" 658 int MatScale_MPIAIJ(Scalar *aa,Mat A) 659 { 660 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 661 int ierr; 662 663 PetscFunctionBegin; 664 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 665 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 666 PetscFunctionReturn(0); 667 } 668 669 #undef __FUNC__ 670 #define __FUNC__ "MatDestroy_MPIAIJ" 671 int MatDestroy_MPIAIJ(Mat mat) 672 { 673 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 674 int ierr; 675 676 PetscFunctionBegin; 677 if (--mat->refct > 0) PetscFunctionReturn(0); 678 679 if (mat->mapping) { 680 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 681 } 682 if (mat->bmapping) { 683 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 684 } 685 if (mat->rmap) { 686 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 687 } 688 if (mat->cmap) { 689 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 690 } 691 #if defined(USE_PETSC_LOG) 692 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 693 #endif 694 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 695 PetscFree(aij->rowners); 696 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 697 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 698 #if defined (USE_CTABLE) 699 if (aij->colmap) TableDelete(aij->colmap); 700 #else 701 if (aij->colmap) PetscFree(aij->colmap); 702 #endif 703 if (aij->garray) PetscFree(aij->garray); 704 if (aij->lvec) VecDestroy(aij->lvec); 705 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 706 if (aij->rowvalues) PetscFree(aij->rowvalues); 707 PetscFree(aij); 708 PLogObjectDestroy(mat); 709 PetscHeaderDestroy(mat); 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNC__ 714 #define __FUNC__ "MatView_MPIAIJ_Binary" 715 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 716 { 717 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 718 int ierr; 719 720 PetscFunctionBegin; 721 if (aij->size == 1) { 722 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 723 } 724 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNC__ 729 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 730 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 731 { 732 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 733 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 734 int ierr, format,shift = C->indexshift,rank = aij->rank ; 735 int size = aij->size; 736 FILE *fd; 737 ViewerType vtype; 738 739 PetscFunctionBegin; 740 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 741 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 742 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 743 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 744 MatInfo info; 745 int flg; 746 MPI_Comm_rank(mat->comm,&rank); 747 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 748 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 749 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 750 PetscSequentialPhaseBegin(mat->comm,1); 751 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 752 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 753 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 754 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 755 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 756 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 757 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 758 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 759 fflush(fd); 760 PetscSequentialPhaseEnd(mat->comm,1); 761 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 762 PetscFunctionReturn(0); 763 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 764 PetscFunctionReturn(0); 765 } 766 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 767 Draw draw; 768 PetscTruth isnull; 769 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 770 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 771 } 772 773 if (size == 1) { 774 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 775 } else { 776 /* assemble the entire matrix onto first processor. */ 777 Mat A; 778 Mat_SeqAIJ *Aloc; 779 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 780 Scalar *a; 781 782 if (!rank) { 783 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 784 } else { 785 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 786 } 787 PLogObjectParent(mat,A); 788 789 /* copy over the A part */ 790 Aloc = (Mat_SeqAIJ*) aij->A->data; 791 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 792 row = aij->rstart; 793 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 794 for ( i=0; i<m; i++ ) { 795 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 796 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 797 } 798 aj = Aloc->j; 799 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 800 801 /* copy over the B part */ 802 Aloc = (Mat_SeqAIJ*) aij->B->data; 803 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 804 row = aij->rstart; 805 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 806 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 807 for ( i=0; i<m; i++ ) { 808 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 809 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 810 } 811 PetscFree(ct); 812 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 813 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 814 /* 815 Everyone has to call to draw the matrix since the graphics waits are 816 synchronized across all processors that share the Draw object 817 */ 818 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 819 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 820 } 821 ierr = MatDestroy(A); CHKERRQ(ierr); 822 } 823 PetscFunctionReturn(0); 824 } 825 826 #undef __FUNC__ 827 #define __FUNC__ "MatView_MPIAIJ" 828 int MatView_MPIAIJ(Mat mat,Viewer viewer) 829 { 830 int ierr; 831 ViewerType vtype; 832 833 PetscFunctionBegin; 834 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 835 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 836 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 837 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 838 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 839 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 840 } else { 841 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 842 } 843 PetscFunctionReturn(0); 844 } 845 846 /* 847 This has to provide several versions. 848 849 2) a) use only local smoothing updating outer values only once. 850 b) local smoothing updating outer values each inner iteration 851 3) color updating out values betwen colors. 852 */ 853 #undef __FUNC__ 854 #define __FUNC__ "MatRelax_MPIAIJ" 855 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 856 double fshift,int its,Vec xx) 857 { 858 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 859 Mat AA = mat->A, BB = mat->B; 860 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 861 Scalar *b,*x,*xs,*ls,d,*v,sum; 862 int ierr,*idx, *diag; 863 int n = mat->n, m = mat->m, i,shift = A->indexshift; 864 865 PetscFunctionBegin; 866 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 867 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 868 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 869 xs = x + shift; /* shift by one for index start of 1 */ 870 ls = ls + shift; 871 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 872 diag = A->diag; 873 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 874 if (flag & SOR_ZERO_INITIAL_GUESS) { 875 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 876 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 877 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 878 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } 881 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 882 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 883 while (its--) { 884 /* go down through the rows */ 885 for ( i=0; i<m; i++ ) { 886 n = A->i[i+1] - A->i[i]; 887 PLogFlops(4*n+3); 888 idx = A->j + A->i[i] + shift; 889 v = A->a + A->i[i] + shift; 890 sum = b[i]; 891 SPARSEDENSEMDOT(sum,xs,v,idx,n); 892 d = fshift + A->a[diag[i]+shift]; 893 n = B->i[i+1] - B->i[i]; 894 idx = B->j + B->i[i] + shift; 895 v = B->a + B->i[i] + shift; 896 SPARSEDENSEMDOT(sum,ls,v,idx,n); 897 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 898 } 899 /* come up through the rows */ 900 for ( i=m-1; i>-1; i-- ) { 901 n = A->i[i+1] - A->i[i]; 902 PLogFlops(4*n+3) 903 idx = A->j + A->i[i] + shift; 904 v = A->a + A->i[i] + shift; 905 sum = b[i]; 906 SPARSEDENSEMDOT(sum,xs,v,idx,n); 907 d = fshift + A->a[diag[i]+shift]; 908 n = B->i[i+1] - B->i[i]; 909 idx = B->j + B->i[i] + shift; 910 v = B->a + B->i[i] + shift; 911 SPARSEDENSEMDOT(sum,ls,v,idx,n); 912 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 913 } 914 } 915 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 916 if (flag & SOR_ZERO_INITIAL_GUESS) { 917 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 918 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 919 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 920 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } 923 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 924 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 925 while (its--) { 926 for ( i=0; i<m; i++ ) { 927 n = A->i[i+1] - A->i[i]; 928 PLogFlops(4*n+3); 929 idx = A->j + A->i[i] + shift; 930 v = A->a + A->i[i] + shift; 931 sum = b[i]; 932 SPARSEDENSEMDOT(sum,xs,v,idx,n); 933 d = fshift + A->a[diag[i]+shift]; 934 n = B->i[i+1] - B->i[i]; 935 idx = B->j + B->i[i] + shift; 936 v = B->a + B->i[i] + shift; 937 SPARSEDENSEMDOT(sum,ls,v,idx,n); 938 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 939 } 940 } 941 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 942 if (flag & SOR_ZERO_INITIAL_GUESS) { 943 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 944 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 945 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 946 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 947 PetscFunctionReturn(0); 948 } 949 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 950 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 951 while (its--) { 952 for ( i=m-1; i>-1; i-- ) { 953 n = A->i[i+1] - A->i[i]; 954 PLogFlops(4*n+3); 955 idx = A->j + A->i[i] + shift; 956 v = A->a + A->i[i] + shift; 957 sum = b[i]; 958 SPARSEDENSEMDOT(sum,xs,v,idx,n); 959 d = fshift + A->a[diag[i]+shift]; 960 n = B->i[i+1] - B->i[i]; 961 idx = B->j + B->i[i] + shift; 962 v = B->a + B->i[i] + shift; 963 SPARSEDENSEMDOT(sum,ls,v,idx,n); 964 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 965 } 966 } 967 } else { 968 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 969 } 970 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 971 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 972 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 #undef __FUNC__ 977 #define __FUNC__ "MatGetInfo_MPIAIJ" 978 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 979 { 980 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 981 Mat A = mat->A, B = mat->B; 982 int ierr; 983 double isend[5], irecv[5]; 984 985 PetscFunctionBegin; 986 info->block_size = 1.0; 987 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 988 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 989 isend[3] = info->memory; isend[4] = info->mallocs; 990 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 991 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 992 isend[3] += info->memory; isend[4] += info->mallocs; 993 if (flag == MAT_LOCAL) { 994 info->nz_used = isend[0]; 995 info->nz_allocated = isend[1]; 996 info->nz_unneeded = isend[2]; 997 info->memory = isend[3]; 998 info->mallocs = isend[4]; 999 } else if (flag == MAT_GLOBAL_MAX) { 1000 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1001 info->nz_used = irecv[0]; 1002 info->nz_allocated = irecv[1]; 1003 info->nz_unneeded = irecv[2]; 1004 info->memory = irecv[3]; 1005 info->mallocs = irecv[4]; 1006 } else if (flag == MAT_GLOBAL_SUM) { 1007 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1008 info->nz_used = irecv[0]; 1009 info->nz_allocated = irecv[1]; 1010 info->nz_unneeded = irecv[2]; 1011 info->memory = irecv[3]; 1012 info->mallocs = irecv[4]; 1013 } 1014 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1015 info->fill_ratio_needed = 0; 1016 info->factor_mallocs = 0; 1017 info->rows_global = (double)mat->M; 1018 info->columns_global = (double)mat->N; 1019 info->rows_local = (double)mat->m; 1020 info->columns_local = (double)mat->N; 1021 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNC__ 1026 #define __FUNC__ "MatSetOption_MPIAIJ" 1027 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1028 { 1029 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1030 int ierr; 1031 1032 PetscFunctionBegin; 1033 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1034 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1035 op == MAT_COLUMNS_UNSORTED || 1036 op == MAT_COLUMNS_SORTED || 1037 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1038 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1039 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1040 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1041 } else if (op == MAT_ROW_ORIENTED) { 1042 a->roworiented = 1; 1043 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1044 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1045 } else if (op == MAT_ROWS_SORTED || 1046 op == MAT_ROWS_UNSORTED || 1047 op == MAT_SYMMETRIC || 1048 op == MAT_STRUCTURALLY_SYMMETRIC || 1049 op == MAT_YES_NEW_DIAGONALS) 1050 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1051 else if (op == MAT_COLUMN_ORIENTED) { 1052 a->roworiented = 0; 1053 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1054 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1055 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1056 a->donotstash = 1; 1057 } else if (op == MAT_NO_NEW_DIAGONALS){ 1058 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1059 } else { 1060 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNC__ 1066 #define __FUNC__ "MatGetSize_MPIAIJ" 1067 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1068 { 1069 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1070 1071 PetscFunctionBegin; 1072 if (m) *m = mat->M; 1073 if (n) *n = mat->N; 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNC__ 1078 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1079 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1080 { 1081 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1082 1083 PetscFunctionBegin; 1084 if (m) *m = mat->m; 1085 if (n) *n = mat->n; 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNC__ 1090 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1091 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1092 { 1093 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1094 1095 PetscFunctionBegin; 1096 *m = mat->rstart; *n = mat->rend; 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNC__ 1101 #define __FUNC__ "MatGetRow_MPIAIJ" 1102 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1103 { 1104 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1105 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1106 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1107 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1108 int *cmap, *idx_p; 1109 1110 PetscFunctionBegin; 1111 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1112 mat->getrowactive = PETSC_TRUE; 1113 1114 if (!mat->rowvalues && (idx || v)) { 1115 /* 1116 allocate enough space to hold information from the longest row. 1117 */ 1118 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1119 int max = 1,m = mat->m,tmp; 1120 for ( i=0; i<m; i++ ) { 1121 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1122 if (max < tmp) { max = tmp; } 1123 } 1124 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1125 mat->rowindices = (int *) (mat->rowvalues + max); 1126 } 1127 1128 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1129 lrow = row - rstart; 1130 1131 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1132 if (!v) {pvA = 0; pvB = 0;} 1133 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1134 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1135 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1136 nztot = nzA + nzB; 1137 1138 cmap = mat->garray; 1139 if (v || idx) { 1140 if (nztot) { 1141 /* Sort by increasing column numbers, assuming A and B already sorted */ 1142 int imark = -1; 1143 if (v) { 1144 *v = v_p = mat->rowvalues; 1145 for ( i=0; i<nzB; i++ ) { 1146 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1147 else break; 1148 } 1149 imark = i; 1150 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1151 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1152 } 1153 if (idx) { 1154 *idx = idx_p = mat->rowindices; 1155 if (imark > -1) { 1156 for ( i=0; i<imark; i++ ) { 1157 idx_p[i] = cmap[cworkB[i]]; 1158 } 1159 } else { 1160 for ( i=0; i<nzB; i++ ) { 1161 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1162 else break; 1163 } 1164 imark = i; 1165 } 1166 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1167 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1168 } 1169 } else { 1170 if (idx) *idx = 0; 1171 if (v) *v = 0; 1172 } 1173 } 1174 *nz = nztot; 1175 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1176 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNC__ 1181 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1182 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1183 { 1184 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1185 1186 PetscFunctionBegin; 1187 if (aij->getrowactive == PETSC_FALSE) { 1188 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1189 } 1190 aij->getrowactive = PETSC_FALSE; 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNC__ 1195 #define __FUNC__ "MatNorm_MPIAIJ" 1196 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1197 { 1198 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1199 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1200 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1201 double sum = 0.0; 1202 Scalar *v; 1203 1204 PetscFunctionBegin; 1205 if (aij->size == 1) { 1206 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1207 } else { 1208 if (type == NORM_FROBENIUS) { 1209 v = amat->a; 1210 for (i=0; i<amat->nz; i++ ) { 1211 #if defined(USE_PETSC_COMPLEX) 1212 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1213 #else 1214 sum += (*v)*(*v); v++; 1215 #endif 1216 } 1217 v = bmat->a; 1218 for (i=0; i<bmat->nz; i++ ) { 1219 #if defined(USE_PETSC_COMPLEX) 1220 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1221 #else 1222 sum += (*v)*(*v); v++; 1223 #endif 1224 } 1225 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1226 *norm = sqrt(*norm); 1227 } else if (type == NORM_1) { /* max column norm */ 1228 double *tmp, *tmp2; 1229 int *jj, *garray = aij->garray; 1230 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1231 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1232 PetscMemzero(tmp,aij->N*sizeof(double)); 1233 *norm = 0.0; 1234 v = amat->a; jj = amat->j; 1235 for ( j=0; j<amat->nz; j++ ) { 1236 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1237 } 1238 v = bmat->a; jj = bmat->j; 1239 for ( j=0; j<bmat->nz; j++ ) { 1240 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1241 } 1242 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1243 for ( j=0; j<aij->N; j++ ) { 1244 if (tmp2[j] > *norm) *norm = tmp2[j]; 1245 } 1246 PetscFree(tmp); PetscFree(tmp2); 1247 } else if (type == NORM_INFINITY) { /* max row norm */ 1248 double ntemp = 0.0; 1249 for ( j=0; j<amat->m; j++ ) { 1250 v = amat->a + amat->i[j] + shift; 1251 sum = 0.0; 1252 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1253 sum += PetscAbsScalar(*v); v++; 1254 } 1255 v = bmat->a + bmat->i[j] + shift; 1256 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1257 sum += PetscAbsScalar(*v); v++; 1258 } 1259 if (sum > ntemp) ntemp = sum; 1260 } 1261 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1262 } else { 1263 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1264 } 1265 } 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNC__ 1270 #define __FUNC__ "MatTranspose_MPIAIJ" 1271 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1272 { 1273 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1274 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1275 int ierr,shift = Aloc->indexshift; 1276 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1277 Mat B; 1278 Scalar *array; 1279 1280 PetscFunctionBegin; 1281 if (matout == PETSC_NULL && M != N) { 1282 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1283 } 1284 1285 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1286 1287 /* copy over the A part */ 1288 Aloc = (Mat_SeqAIJ*) a->A->data; 1289 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1290 row = a->rstart; 1291 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1292 for ( i=0; i<m; i++ ) { 1293 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1294 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1295 } 1296 aj = Aloc->j; 1297 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1298 1299 /* copy over the B part */ 1300 Aloc = (Mat_SeqAIJ*) a->B->data; 1301 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1302 row = a->rstart; 1303 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1304 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1305 for ( i=0; i<m; i++ ) { 1306 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1307 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1308 } 1309 PetscFree(ct); 1310 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1311 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1312 if (matout != PETSC_NULL) { 1313 *matout = B; 1314 } else { 1315 PetscOps *Abops; 1316 struct _MatOps *Aops; 1317 1318 /* This isn't really an in-place transpose .... but free data structures from a */ 1319 PetscFree(a->rowners); 1320 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1321 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1322 #if defined (USE_CTABLE) 1323 if (a->colmap) TableDelete(a->colmap); 1324 #else 1325 if (a->colmap) PetscFree(a->colmap); 1326 #endif 1327 if (a->garray) PetscFree(a->garray); 1328 if (a->lvec) VecDestroy(a->lvec); 1329 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1330 PetscFree(a); 1331 1332 /* 1333 This is horrible, horrible code. We need to keep the 1334 A pointers for the bops and ops but copy everything 1335 else from C. 1336 */ 1337 Abops = A->bops; 1338 Aops = A->ops; 1339 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1340 A->bops = Abops; 1341 A->ops = Aops; 1342 PetscHeaderDestroy(B); 1343 } 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNC__ 1348 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1349 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1350 { 1351 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1352 Mat a = aij->A, b = aij->B; 1353 int ierr,s1,s2,s3; 1354 1355 PetscFunctionBegin; 1356 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1357 if (rr) { 1358 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1359 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1360 /* Overlap communication with computation. */ 1361 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1362 } 1363 if (ll) { 1364 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1365 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1366 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1367 } 1368 /* scale the diagonal block */ 1369 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1370 1371 if (rr) { 1372 /* Do a scatter end and then right scale the off-diagonal block */ 1373 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1374 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1375 } 1376 1377 PetscFunctionReturn(0); 1378 } 1379 1380 1381 #undef __FUNC__ 1382 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1383 int MatPrintHelp_MPIAIJ(Mat A) 1384 { 1385 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1386 int ierr; 1387 1388 PetscFunctionBegin; 1389 if (!a->rank) { 1390 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1391 } 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNC__ 1396 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1397 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1398 { 1399 PetscFunctionBegin; 1400 *bs = 1; 1401 PetscFunctionReturn(0); 1402 } 1403 #undef __FUNC__ 1404 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1405 int MatSetUnfactored_MPIAIJ(Mat A) 1406 { 1407 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1408 int ierr; 1409 1410 PetscFunctionBegin; 1411 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNC__ 1416 #define __FUNC__ "MatEqual_MPIAIJ" 1417 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1418 { 1419 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1420 Mat a, b, c, d; 1421 PetscTruth flg; 1422 int ierr; 1423 1424 PetscFunctionBegin; 1425 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1426 a = matA->A; b = matA->B; 1427 c = matB->A; d = matB->B; 1428 1429 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1430 if (flg == PETSC_TRUE) { 1431 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1432 } 1433 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNC__ 1438 #define __FUNC__ "MatCopy_MPIAIJ" 1439 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1440 { 1441 int ierr; 1442 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1443 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1444 1445 PetscFunctionBegin; 1446 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1447 /* because of the column compression in the off-processor part of the matrix a->B, 1448 the number of columns in a->B and b->B may be different, hence we cannot call 1449 the MatCopy() directly on the two parts. If need be, we can provide a more 1450 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1451 then copying the submatrices */ 1452 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1453 } else { 1454 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1455 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1456 } 1457 PetscFunctionReturn(0); 1458 } 1459 1460 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1461 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1462 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1463 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1464 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1465 1466 /* -------------------------------------------------------------------*/ 1467 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1468 MatGetRow_MPIAIJ, 1469 MatRestoreRow_MPIAIJ, 1470 MatMult_MPIAIJ, 1471 MatMultAdd_MPIAIJ, 1472 MatMultTrans_MPIAIJ, 1473 MatMultTransAdd_MPIAIJ, 1474 0, 1475 0, 1476 0, 1477 0, 1478 0, 1479 0, 1480 MatRelax_MPIAIJ, 1481 MatTranspose_MPIAIJ, 1482 MatGetInfo_MPIAIJ, 1483 MatEqual_MPIAIJ, 1484 MatGetDiagonal_MPIAIJ, 1485 MatDiagonalScale_MPIAIJ, 1486 MatNorm_MPIAIJ, 1487 MatAssemblyBegin_MPIAIJ, 1488 MatAssemblyEnd_MPIAIJ, 1489 0, 1490 MatSetOption_MPIAIJ, 1491 MatZeroEntries_MPIAIJ, 1492 MatZeroRows_MPIAIJ, 1493 0, 1494 0, 1495 0, 1496 0, 1497 MatGetSize_MPIAIJ, 1498 MatGetLocalSize_MPIAIJ, 1499 MatGetOwnershipRange_MPIAIJ, 1500 0, 1501 0, 1502 0, 1503 0, 1504 MatDuplicate_MPIAIJ, 1505 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 MatGetSubMatrices_MPIAIJ, 1511 MatIncreaseOverlap_MPIAIJ, 1512 MatGetValues_MPIAIJ, 1513 MatCopy_MPIAIJ, 1514 MatPrintHelp_MPIAIJ, 1515 MatScale_MPIAIJ, 1516 0, 1517 0, 1518 0, 1519 MatGetBlockSize_MPIAIJ, 1520 0, 1521 0, 1522 0, 1523 0, 1524 MatFDColoringCreate_MPIAIJ, 1525 0, 1526 MatSetUnfactored_MPIAIJ, 1527 0, 1528 0, 1529 MatGetSubMatrix_MPIAIJ, 1530 0, 1531 0, 1532 MatGetMaps_Petsc}; 1533 1534 /* ----------------------------------------------------------------------------------------*/ 1535 1536 EXTERN_C_BEGIN 1537 #undef __FUNC__ 1538 #define __FUNC__ "MatStoreValues_MPIAIJ" 1539 int MatStoreValues_MPIAIJ(Mat mat) 1540 { 1541 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1542 int ierr; 1543 1544 PetscFunctionBegin; 1545 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1546 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1547 PetscFunctionReturn(0); 1548 } 1549 EXTERN_C_END 1550 1551 EXTERN_C_BEGIN 1552 #undef __FUNC__ 1553 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1554 int MatRetrieveValues_MPIAIJ(Mat mat) 1555 { 1556 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1557 int ierr; 1558 1559 PetscFunctionBegin; 1560 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1561 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1562 PetscFunctionReturn(0); 1563 } 1564 EXTERN_C_END 1565 1566 #include "pc.h" 1567 EXTERN_C_BEGIN 1568 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1569 EXTERN_C_END 1570 1571 #undef __FUNC__ 1572 #define __FUNC__ "MatCreateMPIAIJ" 1573 /*@C 1574 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1575 (the default parallel PETSc format). For good matrix assembly performance 1576 the user should preallocate the matrix storage by setting the parameters 1577 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1578 performance can be increased by more than a factor of 50. 1579 1580 Collective on MPI_Comm 1581 1582 Input Parameters: 1583 + comm - MPI communicator 1584 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1585 This value should be the same as the local size used in creating the 1586 y vector for the matrix-vector product y = Ax. 1587 . n - This value should be the same as the local size used in creating the 1588 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1589 calculated if N is given) For square matrices n is almost always m. 1590 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1591 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1592 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1593 (same value is used for all local rows) 1594 . d_nnz - array containing the number of nonzeros in the various rows of the 1595 DIAGONAL portion of the local submatrix (possibly different for each row) 1596 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1597 The size of this array is equal to the number of local rows, i.e 'm'. 1598 You must leave room for the diagonal entry even if it is zero. 1599 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1600 submatrix (same value is used for all local rows). 1601 - o_nnz - array containing the number of nonzeros in the various rows of the 1602 OFF-DIAGONAL portion of the local submatrix (possibly different for 1603 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1604 structure. The size of this array is equal to the number 1605 of local rows, i.e 'm'. 1606 1607 Output Parameter: 1608 . A - the matrix 1609 1610 Notes: 1611 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1612 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1613 storage requirements for this matrix. 1614 1615 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1616 processor than it must be used on all processors that share the object for 1617 that argument. 1618 1619 The AIJ format (also called the Yale sparse matrix format or 1620 compressed row storage), is fully compatible with standard Fortran 77 1621 storage. That is, the stored row and column indices can begin at 1622 either one (as in Fortran) or zero. See the users manual for details. 1623 1624 The user MUST specify either the local or global matrix dimensions 1625 (possibly both). 1626 1627 The parallel matrix is partitioned such that the first m0 rows belong to 1628 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1629 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1630 1631 The DIAGONAL portion of the local submatrix of a processor can be defined 1632 as the submatrix which is obtained by extraction the part corresponding 1633 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1634 first row that belongs to the processor, and r2 is the last row belonging 1635 to the this processor. This is a square mxm matrix. The remaining portion 1636 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1637 1638 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1639 1640 By default, this format uses inodes (identical nodes) when possible. 1641 We search for consecutive rows with the same nonzero structure, thereby 1642 reusing matrix information to achieve increased efficiency. 1643 1644 Options Database Keys: 1645 + -mat_aij_no_inode - Do not use inodes 1646 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1647 - -mat_aij_oneindex - Internally use indexing starting at 1 1648 rather than 0. Note that when calling MatSetValues(), 1649 the user still MUST index entries starting at 0! 1650 . -mat_mpi - use the parallel matrix data structures even on one processor 1651 (defaults to using SeqBAIJ format on one processor) 1652 . -mat_mpi - use the parallel matrix data structures even on one processor 1653 (defaults to using SeqAIJ format on one processor) 1654 1655 1656 Example usage: 1657 1658 Consider the following 8x8 matrix with 34 non-zero values, that is 1659 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1660 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1661 as follows: 1662 1663 .vb 1664 1 2 0 | 0 3 0 | 0 4 1665 Proc0 0 5 6 | 7 0 0 | 8 0 1666 9 0 10 | 11 0 0 | 12 0 1667 ------------------------------------- 1668 13 0 14 | 15 16 17 | 0 0 1669 Proc1 0 18 0 | 19 20 21 | 0 0 1670 0 0 0 | 22 23 0 | 24 0 1671 ------------------------------------- 1672 Proc2 25 26 27 | 0 0 28 | 29 0 1673 30 0 0 | 31 32 33 | 0 34 1674 .ve 1675 1676 This can be represented as a collection of submatrices as: 1677 1678 .vb 1679 A B C 1680 D E F 1681 G H I 1682 .ve 1683 1684 Where the submatrices A,B,C are owned by proc0, D,E,F are 1685 owned by proc1, G,H,I are owned by proc2. 1686 1687 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1688 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1689 The 'M','N' parameters are 8,8, and have the same values on all procs. 1690 1691 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1692 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1693 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1694 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1695 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1696 matrix, ans [DF] as another SeqAIJ matrix. 1697 1698 When d_nz, o_nz parameters are specified, d_nz storage elements are 1699 allocated for every row of the local diagonal submatrix, and o_nz 1700 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1701 One way to choose d_nz and o_nz is to use the max nonzerors per local 1702 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1703 In this case, the values of d_nz,o_nz are: 1704 .vb 1705 proc0 : dnz = 2, o_nz = 2 1706 proc1 : dnz = 3, o_nz = 2 1707 proc2 : dnz = 1, o_nz = 4 1708 .ve 1709 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1710 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1711 for proc3. i.e we are using 12+15+10=37 storage locations to store 1712 34 values. 1713 1714 When d_nnz, o_nnz parameters are specified, the storage is specified 1715 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1716 In the above case the values for d_nnz,o_nnz are: 1717 .vb 1718 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1719 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1720 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1721 .ve 1722 Here the space allocated is sum of all the above values i.e 34, and 1723 hence pre-allocation is perfect. 1724 1725 Level: intermediate 1726 1727 .keywords: matrix, aij, compressed row, sparse, parallel 1728 1729 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1730 @*/ 1731 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1732 { 1733 Mat B; 1734 Mat_MPIAIJ *b; 1735 int ierr, i,size,flag1 = 0, flag2 = 0; 1736 1737 PetscFunctionBegin; 1738 MPI_Comm_size(comm,&size); 1739 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 1740 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1741 if (!flag1 && !flag2 && size == 1) { 1742 if (M == PETSC_DECIDE) M = m; 1743 if (N == PETSC_DECIDE) N = n; 1744 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1745 PetscFunctionReturn(0); 1746 } 1747 1748 *A = 0; 1749 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1750 PLogObjectCreate(B); 1751 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1752 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1753 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1754 B->ops->destroy = MatDestroy_MPIAIJ; 1755 B->ops->view = MatView_MPIAIJ; 1756 B->factor = 0; 1757 B->assembled = PETSC_FALSE; 1758 B->mapping = 0; 1759 1760 B->insertmode = NOT_SET_VALUES; 1761 b->size = size; 1762 MPI_Comm_rank(comm,&b->rank); 1763 1764 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1765 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1766 } 1767 1768 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1769 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1770 b->m = m; B->m = m; 1771 b->n = n; B->n = n; 1772 b->N = N; B->N = N; 1773 b->M = M; B->M = M; 1774 1775 /* the information in the maps duplicates the information computed below, eventually 1776 we should remove the duplicate information that is not contained in the maps */ 1777 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1778 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1779 1780 /* build local table of row and column ownerships */ 1781 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1782 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1783 b->cowners = b->rowners + b->size + 2; 1784 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1785 b->rowners[0] = 0; 1786 for ( i=2; i<=b->size; i++ ) { 1787 b->rowners[i] += b->rowners[i-1]; 1788 } 1789 b->rstart = b->rowners[b->rank]; 1790 b->rend = b->rowners[b->rank+1]; 1791 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1792 b->cowners[0] = 0; 1793 for ( i=2; i<=b->size; i++ ) { 1794 b->cowners[i] += b->cowners[i-1]; 1795 } 1796 b->cstart = b->cowners[b->rank]; 1797 b->cend = b->cowners[b->rank+1]; 1798 1799 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1800 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1801 PLogObjectParent(B,b->A); 1802 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1803 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1804 PLogObjectParent(B,b->B); 1805 1806 /* build cache for off array entries formed */ 1807 ierr = StashCreate_Private(comm,1,&b->stash); CHKERRQ(ierr); 1808 b->donotstash = 0; 1809 b->colmap = 0; 1810 b->garray = 0; 1811 b->roworiented = 1; 1812 1813 /* stuff used for matrix vector multiply */ 1814 b->lvec = 0; 1815 b->Mvctx = 0; 1816 1817 /* stuff for MatGetRow() */ 1818 b->rowindices = 0; 1819 b->rowvalues = 0; 1820 b->getrowactive = PETSC_FALSE; 1821 1822 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1823 "MatStoreValues_MPIAIJ", 1824 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1825 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1826 "MatRetrieveValues_MPIAIJ", 1827 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1828 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1829 "MatGetDiagonalBlock_MPIAIJ", 1830 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1831 *A = B; 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNC__ 1836 #define __FUNC__ "MatDuplicate_MPIAIJ" 1837 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1838 { 1839 Mat mat; 1840 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1841 int ierr, len=0, flg; 1842 1843 PetscFunctionBegin; 1844 *newmat = 0; 1845 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1846 PLogObjectCreate(mat); 1847 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1848 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1849 mat->ops->destroy = MatDestroy_MPIAIJ; 1850 mat->ops->view = MatView_MPIAIJ; 1851 mat->factor = matin->factor; 1852 mat->assembled = PETSC_TRUE; 1853 mat->insertmode = NOT_SET_VALUES; 1854 1855 a->m = mat->m = oldmat->m; 1856 a->n = mat->n = oldmat->n; 1857 a->M = mat->M = oldmat->M; 1858 a->N = mat->N = oldmat->N; 1859 1860 a->rstart = oldmat->rstart; 1861 a->rend = oldmat->rend; 1862 a->cstart = oldmat->cstart; 1863 a->cend = oldmat->cend; 1864 a->size = oldmat->size; 1865 a->rank = oldmat->rank; 1866 a->donotstash = oldmat->donotstash; 1867 a->roworiented = oldmat->roworiented; 1868 a->rowindices = 0; 1869 a->rowvalues = 0; 1870 a->getrowactive = PETSC_FALSE; 1871 1872 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1873 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1874 a->cowners = a->rowners + a->size + 2; 1875 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1876 ierr = StashCreate_Private(matin->comm,1,&a->stash); CHKERRQ(ierr); 1877 if (oldmat->colmap) { 1878 #if defined (USE_CTABLE) 1879 ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 1880 #else 1881 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1882 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1883 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1884 #endif 1885 } else a->colmap = 0; 1886 if (oldmat->garray) { 1887 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1888 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1889 PLogObjectMemory(mat,len*sizeof(int)); 1890 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1891 } else a->garray = 0; 1892 1893 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1894 PLogObjectParent(mat,a->lvec); 1895 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1896 PLogObjectParent(mat,a->Mvctx); 1897 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 1898 PLogObjectParent(mat,a->A); 1899 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 1900 PLogObjectParent(mat,a->B); 1901 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1902 if (flg) { 1903 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1904 } 1905 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1906 *newmat = mat; 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #include "sys.h" 1911 1912 #undef __FUNC__ 1913 #define __FUNC__ "MatLoad_MPIAIJ" 1914 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1915 { 1916 Mat A; 1917 Scalar *vals,*svals; 1918 MPI_Comm comm = ((PetscObject)viewer)->comm; 1919 MPI_Status status; 1920 int i, nz, ierr, j,rstart, rend, fd; 1921 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1922 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1923 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1924 1925 PetscFunctionBegin; 1926 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1927 if (!rank) { 1928 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1929 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1930 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1931 if (header[3] < 0) { 1932 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1933 } 1934 } 1935 1936 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1937 M = header[1]; N = header[2]; 1938 /* determine ownership of all rows */ 1939 m = M/size + ((M % size) > rank); 1940 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1941 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1942 rowners[0] = 0; 1943 for ( i=2; i<=size; i++ ) { 1944 rowners[i] += rowners[i-1]; 1945 } 1946 rstart = rowners[rank]; 1947 rend = rowners[rank+1]; 1948 1949 /* distribute row lengths to all processors */ 1950 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1951 offlens = ourlens + (rend-rstart); 1952 if (!rank) { 1953 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1954 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1955 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1956 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1957 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1958 PetscFree(sndcounts); 1959 } else { 1960 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1961 } 1962 1963 if (!rank) { 1964 /* calculate the number of nonzeros on each processor */ 1965 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1966 PetscMemzero(procsnz,size*sizeof(int)); 1967 for ( i=0; i<size; i++ ) { 1968 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1969 procsnz[i] += rowlengths[j]; 1970 } 1971 } 1972 PetscFree(rowlengths); 1973 1974 /* determine max buffer needed and allocate it */ 1975 maxnz = 0; 1976 for ( i=0; i<size; i++ ) { 1977 maxnz = PetscMax(maxnz,procsnz[i]); 1978 } 1979 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1980 1981 /* read in my part of the matrix column indices */ 1982 nz = procsnz[0]; 1983 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1984 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1985 1986 /* read in every one elses and ship off */ 1987 for ( i=1; i<size; i++ ) { 1988 nz = procsnz[i]; 1989 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1990 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1991 } 1992 PetscFree(cols); 1993 } else { 1994 /* determine buffer space needed for message */ 1995 nz = 0; 1996 for ( i=0; i<m; i++ ) { 1997 nz += ourlens[i]; 1998 } 1999 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2000 2001 /* receive message of column indices*/ 2002 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2003 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2004 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2005 } 2006 2007 /* determine column ownership if matrix is not square */ 2008 if (N != M) { 2009 n = N/size + ((N % size) > rank); 2010 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2011 cstart = cend - n; 2012 } else { 2013 cstart = rstart; 2014 cend = rend; 2015 n = cend - cstart; 2016 } 2017 2018 /* loop over local rows, determining number of off diagonal entries */ 2019 PetscMemzero(offlens,m*sizeof(int)); 2020 jj = 0; 2021 for ( i=0; i<m; i++ ) { 2022 for ( j=0; j<ourlens[i]; j++ ) { 2023 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2024 jj++; 2025 } 2026 } 2027 2028 /* create our matrix */ 2029 for ( i=0; i<m; i++ ) { 2030 ourlens[i] -= offlens[i]; 2031 } 2032 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2033 A = *newmat; 2034 ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2035 for ( i=0; i<m; i++ ) { 2036 ourlens[i] += offlens[i]; 2037 } 2038 2039 if (!rank) { 2040 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2041 2042 /* read in my part of the matrix numerical values */ 2043 nz = procsnz[0]; 2044 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2045 2046 /* insert into matrix */ 2047 jj = rstart; 2048 smycols = mycols; 2049 svals = vals; 2050 for ( i=0; i<m; i++ ) { 2051 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2052 smycols += ourlens[i]; 2053 svals += ourlens[i]; 2054 jj++; 2055 } 2056 2057 /* read in other processors and ship out */ 2058 for ( i=1; i<size; i++ ) { 2059 nz = procsnz[i]; 2060 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2061 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2062 } 2063 PetscFree(procsnz); 2064 } else { 2065 /* receive numeric values */ 2066 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2067 2068 /* receive message of values*/ 2069 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2070 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2071 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2072 2073 /* insert into matrix */ 2074 jj = rstart; 2075 smycols = mycols; 2076 svals = vals; 2077 for ( i=0; i<m; i++ ) { 2078 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2079 smycols += ourlens[i]; 2080 svals += ourlens[i]; 2081 jj++; 2082 } 2083 } 2084 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2085 2086 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2087 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2088 PetscFunctionReturn(0); 2089 } 2090 2091 #undef __FUNC__ 2092 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2093 /* 2094 Not great since it makes two copies of the submatrix, first an SeqAIJ 2095 in local and then by concatenating the local matrices the end result. 2096 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2097 */ 2098 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2099 { 2100 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2101 Mat *local,M, Mreuse; 2102 Scalar *vwork,*aa; 2103 MPI_Comm comm = mat->comm; 2104 Mat_SeqAIJ *aij; 2105 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2106 2107 PetscFunctionBegin; 2108 MPI_Comm_rank(comm,&rank); 2109 MPI_Comm_size(comm,&size); 2110 2111 if (call == MAT_REUSE_MATRIX) { 2112 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2113 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2114 local = &Mreuse; 2115 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2116 } else { 2117 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2118 Mreuse = *local; 2119 PetscFree(local); 2120 } 2121 2122 /* 2123 m - number of local rows 2124 n - number of columns (same on all processors) 2125 rstart - first row in new global matrix generated 2126 */ 2127 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2128 if (call == MAT_INITIAL_MATRIX) { 2129 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2130 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2131 ii = aij->i; 2132 jj = aij->j; 2133 2134 /* 2135 Determine the number of non-zeros in the diagonal and off-diagonal 2136 portions of the matrix in order to do correct preallocation 2137 */ 2138 2139 /* first get start and end of "diagonal" columns */ 2140 if (csize == PETSC_DECIDE) { 2141 nlocal = n/size + ((n % size) > rank); 2142 } else { 2143 nlocal = csize; 2144 } 2145 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2146 rstart = rend - nlocal; 2147 if (rank == size - 1 && rend != n) { 2148 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2149 } 2150 2151 /* next, compute all the lengths */ 2152 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2153 olens = dlens + m; 2154 for ( i=0; i<m; i++ ) { 2155 jend = ii[i+1] - ii[i]; 2156 olen = 0; 2157 dlen = 0; 2158 for ( j=0; j<jend; j++ ) { 2159 if ( *jj < rstart || *jj >= rend) olen++; 2160 else dlen++; 2161 jj++; 2162 } 2163 olens[i] = olen; 2164 dlens[i] = dlen; 2165 } 2166 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2167 PetscFree(dlens); 2168 } else { 2169 int ml,nl; 2170 2171 M = *newmat; 2172 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2173 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2174 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2175 /* 2176 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2177 rather than the slower MatSetValues(). 2178 */ 2179 M->was_assembled = PETSC_TRUE; 2180 M->assembled = PETSC_FALSE; 2181 } 2182 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2183 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2184 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2185 ii = aij->i; 2186 jj = aij->j; 2187 aa = aij->a; 2188 for (i=0; i<m; i++) { 2189 row = rstart + i; 2190 nz = ii[i+1] - ii[i]; 2191 cwork = jj; jj += nz; 2192 vwork = aa; aa += nz; 2193 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2194 } 2195 2196 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2197 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2198 *newmat = M; 2199 2200 /* save submatrix used in processor for next request */ 2201 if (call == MAT_INITIAL_MATRIX) { 2202 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2203 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2204 } 2205 2206 PetscFunctionReturn(0); 2207 } 2208