1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 PetscScalar *val; 356 InsertMode addv = mat->insertmode; 357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 358 PetscTruth flag; 359 #endif 360 361 PetscFunctionBegin; 362 if (!aij->donotstash) { 363 while (1) { 364 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 365 if (!flg) break; 366 367 for (i=0; i<n;) { 368 /* Now identify the consecutive vals belonging to the same row */ 369 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 370 if (j < n) ncols = j-i; 371 else ncols = n-i; 372 /* Now assemble all these values with a single function call */ 373 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 374 i = j; 375 } 376 } 377 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 378 } 379 380 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 381 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 382 383 /* determine if any processor has disassembled, if so we must 384 also disassemble ourselfs, in order that we may reassemble. */ 385 /* 386 if nonzero structure of submatrix B cannot change then we know that 387 no processor disassembled thus we can skip this stuff 388 */ 389 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 390 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 391 if (mat->was_assembled && !other_disassembled) { 392 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 } 395 396 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 397 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 398 } 399 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 400 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 401 402 if (aij->rowvalues) { 403 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 404 aij->rowvalues = 0; 405 } 406 407 /* used by MatAXPY() */ 408 Mat_SeqAIJ *a = (Mat_SeqAIJ *)aij->A->data; 409 Mat_SeqAIJ *b = (Mat_SeqAIJ *)aij->B->data; 410 a->xtoy = 0; b->xtoy = 0; 411 a->XtoY = 0; b->XtoY = 0; 412 413 #if defined(PETSC_HAVE_SUPERLUDIST) 414 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 415 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 416 #endif 417 418 #if defined(PETSC_HAVE_SPOOLES) 419 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 420 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 421 #endif 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 427 int MatZeroEntries_MPIAIJ(Mat A) 428 { 429 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 430 int ierr; 431 432 PetscFunctionBegin; 433 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 434 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNCT__ 439 #define __FUNCT__ "MatZeroRows_MPIAIJ" 440 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 441 { 442 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 443 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 444 int *nprocs,j,idx,nsends,row; 445 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 446 int *rvalues,tag = A->tag,count,base,slen,n,*source; 447 int *lens,imdex,*lrows,*values,rstart=l->rstart; 448 MPI_Comm comm = A->comm; 449 MPI_Request *send_waits,*recv_waits; 450 MPI_Status recv_status,*send_status; 451 IS istmp; 452 PetscTruth found; 453 454 PetscFunctionBegin; 455 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 456 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 457 458 /* first count number of contributors to each processor */ 459 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 460 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 461 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 462 for (i=0; i<N; i++) { 463 idx = rows[i]; 464 found = PETSC_FALSE; 465 for (j=0; j<size; j++) { 466 if (idx >= owners[j] && idx < owners[j+1]) { 467 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 468 } 469 } 470 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 471 } 472 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 473 474 /* inform other processors of number of messages and max length*/ 475 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 476 477 /* post receives: */ 478 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 479 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 480 for (i=0; i<nrecvs; i++) { 481 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 482 } 483 484 /* do sends: 485 1) starts[i] gives the starting index in svalues for stuff going to 486 the ith processor 487 */ 488 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 489 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 490 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 491 starts[0] = 0; 492 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 493 for (i=0; i<N; i++) { 494 svalues[starts[owner[i]]++] = rows[i]; 495 } 496 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 497 498 starts[0] = 0; 499 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 500 count = 0; 501 for (i=0; i<size; i++) { 502 if (nprocs[2*i+1]) { 503 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 504 } 505 } 506 ierr = PetscFree(starts);CHKERRQ(ierr); 507 508 base = owners[rank]; 509 510 /* wait on receives */ 511 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 512 source = lens + nrecvs; 513 count = nrecvs; slen = 0; 514 while (count) { 515 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 516 /* unpack receives into our local space */ 517 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 518 source[imdex] = recv_status.MPI_SOURCE; 519 lens[imdex] = n; 520 slen += n; 521 count--; 522 } 523 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 524 525 /* move the data into the send scatter */ 526 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 527 count = 0; 528 for (i=0; i<nrecvs; i++) { 529 values = rvalues + i*nmax; 530 for (j=0; j<lens[i]; j++) { 531 lrows[count++] = values[j] - base; 532 } 533 } 534 ierr = PetscFree(rvalues);CHKERRQ(ierr); 535 ierr = PetscFree(lens);CHKERRQ(ierr); 536 ierr = PetscFree(owner);CHKERRQ(ierr); 537 ierr = PetscFree(nprocs);CHKERRQ(ierr); 538 539 /* actually zap the local rows */ 540 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 541 PetscLogObjectParent(A,istmp); 542 543 /* 544 Zero the required rows. If the "diagonal block" of the matrix 545 is square and the user wishes to set the diagonal we use seperate 546 code so that MatSetValues() is not called for each diagonal allocating 547 new memory, thus calling lots of mallocs and slowing things down. 548 549 Contributed by: Mathew Knepley 550 */ 551 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 552 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 553 if (diag && (l->A->M == l->A->N)) { 554 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 555 } else if (diag) { 556 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 557 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 558 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 559 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 560 } 561 for (i = 0; i < slen; i++) { 562 row = lrows[i] + rstart; 563 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 564 } 565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 567 } else { 568 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 569 } 570 ierr = ISDestroy(istmp);CHKERRQ(ierr); 571 ierr = PetscFree(lrows);CHKERRQ(ierr); 572 573 /* wait on sends */ 574 if (nsends) { 575 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 576 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 577 ierr = PetscFree(send_status);CHKERRQ(ierr); 578 } 579 ierr = PetscFree(send_waits);CHKERRQ(ierr); 580 ierr = PetscFree(svalues);CHKERRQ(ierr); 581 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatMult_MPIAIJ" 587 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 588 { 589 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 590 int ierr,nt; 591 592 PetscFunctionBegin; 593 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 594 if (nt != A->n) { 595 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 596 } 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatMultAdd_MPIAIJ" 606 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 609 int ierr; 610 611 PetscFunctionBegin; 612 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 613 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 614 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 615 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 #undef __FUNCT__ 620 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 621 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 622 { 623 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 624 int ierr; 625 626 PetscFunctionBegin; 627 /* do nondiagonal part */ 628 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 629 /* send it on its way */ 630 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 631 /* do local part */ 632 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 633 /* receive remote parts: note this assumes the values are not actually */ 634 /* inserted in yy until the next line, which is true for my implementation*/ 635 /* but is not perhaps always true. */ 636 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 642 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 643 { 644 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 645 int ierr; 646 647 PetscFunctionBegin; 648 /* do nondiagonal part */ 649 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 650 /* send it on its way */ 651 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 652 /* do local part */ 653 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 654 /* receive remote parts: note this assumes the values are not actually */ 655 /* inserted in yy until the next line, which is true for my implementation*/ 656 /* but is not perhaps always true. */ 657 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 /* 662 This only works correctly for square matrices where the subblock A->A is the 663 diagonal block 664 */ 665 #undef __FUNCT__ 666 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 667 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 668 { 669 int ierr; 670 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 671 672 PetscFunctionBegin; 673 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 674 if (a->rstart != a->cstart || a->rend != a->cend) { 675 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 676 } 677 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "MatScale_MPIAIJ" 683 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 684 { 685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 686 int ierr; 687 688 PetscFunctionBegin; 689 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 690 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatDestroy_MPIAIJ" 696 int MatDestroy_MPIAIJ(Mat mat) 697 { 698 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 699 int ierr; 700 701 PetscFunctionBegin; 702 #if defined(PETSC_USE_LOG) 703 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 704 #endif 705 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 706 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 707 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 708 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 709 #if defined (PETSC_USE_CTABLE) 710 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 711 #else 712 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 713 #endif 714 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 715 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 716 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 717 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 718 ierr = PetscFree(aij);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 723 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatView_MPIAIJ_Binary" 727 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 730 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 731 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 732 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 733 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 734 PetscScalar *column_values; 735 736 PetscFunctionBegin; 737 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 738 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 739 nz = A->nz + B->nz; 740 if (rank == 0) { 741 header[0] = MAT_FILE_COOKIE; 742 header[1] = mat->M; 743 header[2] = mat->N; 744 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 745 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 746 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 747 /* get largest number of rows any processor has */ 748 rlen = mat->m; 749 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 750 for (i=1; i<size; i++) { 751 rlen = PetscMax(rlen,range[i+1] - range[i]); 752 } 753 } else { 754 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 755 rlen = mat->m; 756 } 757 758 /* load up the local row counts */ 759 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 760 for (i=0; i<mat->m; i++) { 761 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 762 } 763 764 /* store the row lengths to the file */ 765 if (rank == 0) { 766 MPI_Status status; 767 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 768 for (i=1; i<size; i++) { 769 rlen = range[i+1] - range[i]; 770 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 771 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 772 } 773 } else { 774 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 775 } 776 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 777 778 /* load up the local column indices */ 779 nzmax = nz; /* )th processor needs space a largest processor needs */ 780 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 781 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 782 cnt = 0; 783 for (i=0; i<mat->m; i++) { 784 for (j=B->i[i]; j<B->i[i+1]; j++) { 785 if ( (col = garray[B->j[j]]) > cstart) break; 786 column_indices[cnt++] = col; 787 } 788 for (k=A->i[i]; k<A->i[i+1]; k++) { 789 column_indices[cnt++] = A->j[k] + cstart; 790 } 791 for (; j<B->i[i+1]; j++) { 792 column_indices[cnt++] = garray[B->j[j]]; 793 } 794 } 795 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 796 797 /* store the column indices to the file */ 798 if (rank == 0) { 799 MPI_Status status; 800 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 801 for (i=1; i<size; i++) { 802 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 803 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 804 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 805 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 806 } 807 } else { 808 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 809 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 810 } 811 ierr = PetscFree(column_indices);CHKERRQ(ierr); 812 813 /* load up the local column values */ 814 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 815 cnt = 0; 816 for (i=0; i<mat->m; i++) { 817 for (j=B->i[i]; j<B->i[i+1]; j++) { 818 if ( garray[B->j[j]] > cstart) break; 819 column_values[cnt++] = B->a[j]; 820 } 821 for (k=A->i[i]; k<A->i[i+1]; k++) { 822 column_values[cnt++] = A->a[k]; 823 } 824 for (; j<B->i[i+1]; j++) { 825 column_values[cnt++] = B->a[j]; 826 } 827 } 828 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 829 830 /* store the column values to the file */ 831 if (rank == 0) { 832 MPI_Status status; 833 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 834 for (i=1; i<size; i++) { 835 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 836 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 837 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 838 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 839 } 840 } else { 841 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 842 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 843 } 844 ierr = PetscFree(column_values);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 850 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 851 { 852 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 853 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 854 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 855 PetscTruth isdraw,isascii,flg,isbinary; 856 PetscViewer sviewer; 857 PetscViewerFormat format; 858 859 PetscFunctionBegin; 860 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 861 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 862 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 863 if (isascii) { 864 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 865 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 866 MatInfo info; 867 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 868 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 869 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 870 if (flg) { 871 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 872 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 873 } else { 874 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 875 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 876 } 877 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 878 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 879 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 880 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 881 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 882 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } else if (format == PETSC_VIEWER_ASCII_INFO) { 885 PetscFunctionReturn(0); 886 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 887 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 888 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 889 #endif 890 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 891 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 892 #endif 893 894 PetscFunctionReturn(0); 895 } 896 } else if (isbinary) { 897 if (size == 1) { 898 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 899 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 900 } else { 901 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 902 } 903 PetscFunctionReturn(0); 904 } else if (isdraw) { 905 PetscDraw draw; 906 PetscTruth isnull; 907 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 908 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 909 } 910 911 if (size == 1) { 912 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 913 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 914 } else { 915 /* assemble the entire matrix onto first processor. */ 916 Mat A; 917 Mat_SeqAIJ *Aloc; 918 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 919 PetscScalar *a; 920 921 if (!rank) { 922 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 923 } else { 924 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 925 } 926 PetscLogObjectParent(mat,A); 927 928 /* copy over the A part */ 929 Aloc = (Mat_SeqAIJ*)aij->A->data; 930 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 931 row = aij->rstart; 932 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 933 for (i=0; i<m; i++) { 934 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 935 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 936 } 937 aj = Aloc->j; 938 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 939 940 /* copy over the B part */ 941 Aloc = (Mat_SeqAIJ*)aij->B->data; 942 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 943 row = aij->rstart; 944 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 945 ct = cols; 946 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 947 for (i=0; i<m; i++) { 948 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 949 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 950 } 951 ierr = PetscFree(ct);CHKERRQ(ierr); 952 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 953 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 954 /* 955 Everyone has to call to draw the matrix since the graphics waits are 956 synchronized across all processors that share the PetscDraw object 957 */ 958 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 959 if (!rank) { 960 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 961 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 962 } 963 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 964 ierr = MatDestroy(A);CHKERRQ(ierr); 965 } 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatView_MPIAIJ" 971 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 972 { 973 int ierr; 974 PetscTruth isascii,isdraw,issocket,isbinary; 975 976 PetscFunctionBegin; 977 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 978 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 979 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 980 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 981 if (isascii || isdraw || isbinary || issocket) { 982 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 983 } else { 984 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 985 } 986 PetscFunctionReturn(0); 987 } 988 989 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatRelax_MPIAIJ" 993 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 994 { 995 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 996 int ierr; 997 Vec bb1; 998 PetscScalar mone=-1.0; 999 1000 PetscFunctionBegin; 1001 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1002 1003 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1004 1005 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1006 if (flag & SOR_ZERO_INITIAL_GUESS) { 1007 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1008 its--; 1009 } 1010 1011 while (its--) { 1012 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1013 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1014 1015 /* update rhs: bb1 = bb - B*x */ 1016 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1017 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1018 1019 /* local sweep */ 1020 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1021 CHKERRQ(ierr); 1022 } 1023 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1024 if (flag & SOR_ZERO_INITIAL_GUESS) { 1025 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1026 its--; 1027 } 1028 while (its--) { 1029 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1030 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1031 1032 /* update rhs: bb1 = bb - B*x */ 1033 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1034 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1035 1036 /* local sweep */ 1037 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1038 CHKERRQ(ierr); 1039 } 1040 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1041 if (flag & SOR_ZERO_INITIAL_GUESS) { 1042 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1043 its--; 1044 } 1045 while (its--) { 1046 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1047 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1048 1049 /* update rhs: bb1 = bb - B*x */ 1050 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1051 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1052 1053 /* local sweep */ 1054 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1055 CHKERRQ(ierr); 1056 } 1057 } else { 1058 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1059 } 1060 1061 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1067 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1068 { 1069 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1070 Mat A = mat->A,B = mat->B; 1071 int ierr; 1072 PetscReal isend[5],irecv[5]; 1073 1074 PetscFunctionBegin; 1075 info->block_size = 1.0; 1076 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1077 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1078 isend[3] = info->memory; isend[4] = info->mallocs; 1079 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1080 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1081 isend[3] += info->memory; isend[4] += info->mallocs; 1082 if (flag == MAT_LOCAL) { 1083 info->nz_used = isend[0]; 1084 info->nz_allocated = isend[1]; 1085 info->nz_unneeded = isend[2]; 1086 info->memory = isend[3]; 1087 info->mallocs = isend[4]; 1088 } else if (flag == MAT_GLOBAL_MAX) { 1089 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1090 info->nz_used = irecv[0]; 1091 info->nz_allocated = irecv[1]; 1092 info->nz_unneeded = irecv[2]; 1093 info->memory = irecv[3]; 1094 info->mallocs = irecv[4]; 1095 } else if (flag == MAT_GLOBAL_SUM) { 1096 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1097 info->nz_used = irecv[0]; 1098 info->nz_allocated = irecv[1]; 1099 info->nz_unneeded = irecv[2]; 1100 info->memory = irecv[3]; 1101 info->mallocs = irecv[4]; 1102 } 1103 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1104 info->fill_ratio_needed = 0; 1105 info->factor_mallocs = 0; 1106 info->rows_global = (double)matin->M; 1107 info->columns_global = (double)matin->N; 1108 info->rows_local = (double)matin->m; 1109 info->columns_local = (double)matin->N; 1110 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatSetOption_MPIAIJ" 1116 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 int ierr; 1120 1121 PetscFunctionBegin; 1122 switch (op) { 1123 case MAT_NO_NEW_NONZERO_LOCATIONS: 1124 case MAT_YES_NEW_NONZERO_LOCATIONS: 1125 case MAT_COLUMNS_UNSORTED: 1126 case MAT_COLUMNS_SORTED: 1127 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1128 case MAT_KEEP_ZEROED_ROWS: 1129 case MAT_NEW_NONZERO_LOCATION_ERR: 1130 case MAT_USE_INODES: 1131 case MAT_DO_NOT_USE_INODES: 1132 case MAT_IGNORE_ZERO_ENTRIES: 1133 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1134 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1135 break; 1136 case MAT_ROW_ORIENTED: 1137 a->roworiented = PETSC_TRUE; 1138 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1139 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1140 break; 1141 case MAT_ROWS_SORTED: 1142 case MAT_ROWS_UNSORTED: 1143 case MAT_YES_NEW_DIAGONALS: 1144 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1145 break; 1146 case MAT_COLUMN_ORIENTED: 1147 a->roworiented = PETSC_FALSE; 1148 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1149 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1150 break; 1151 case MAT_IGNORE_OFF_PROC_ENTRIES: 1152 a->donotstash = PETSC_TRUE; 1153 break; 1154 case MAT_NO_NEW_DIAGONALS: 1155 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1156 default: 1157 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1158 } 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatGetRow_MPIAIJ" 1164 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1165 { 1166 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1167 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1168 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1169 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1170 int *cmap,*idx_p; 1171 1172 PetscFunctionBegin; 1173 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1174 mat->getrowactive = PETSC_TRUE; 1175 1176 if (!mat->rowvalues && (idx || v)) { 1177 /* 1178 allocate enough space to hold information from the longest row. 1179 */ 1180 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1181 int max = 1,tmp; 1182 for (i=0; i<matin->m; i++) { 1183 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1184 if (max < tmp) { max = tmp; } 1185 } 1186 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1187 mat->rowindices = (int*)(mat->rowvalues + max); 1188 } 1189 1190 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1191 lrow = row - rstart; 1192 1193 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1194 if (!v) {pvA = 0; pvB = 0;} 1195 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1196 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1197 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1198 nztot = nzA + nzB; 1199 1200 cmap = mat->garray; 1201 if (v || idx) { 1202 if (nztot) { 1203 /* Sort by increasing column numbers, assuming A and B already sorted */ 1204 int imark = -1; 1205 if (v) { 1206 *v = v_p = mat->rowvalues; 1207 for (i=0; i<nzB; i++) { 1208 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1209 else break; 1210 } 1211 imark = i; 1212 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1213 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1214 } 1215 if (idx) { 1216 *idx = idx_p = mat->rowindices; 1217 if (imark > -1) { 1218 for (i=0; i<imark; i++) { 1219 idx_p[i] = cmap[cworkB[i]]; 1220 } 1221 } else { 1222 for (i=0; i<nzB; i++) { 1223 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1224 else break; 1225 } 1226 imark = i; 1227 } 1228 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1229 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1230 } 1231 } else { 1232 if (idx) *idx = 0; 1233 if (v) *v = 0; 1234 } 1235 } 1236 *nz = nztot; 1237 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1238 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1244 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1245 { 1246 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1247 1248 PetscFunctionBegin; 1249 if (aij->getrowactive == PETSC_FALSE) { 1250 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1251 } 1252 aij->getrowactive = PETSC_FALSE; 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatNorm_MPIAIJ" 1258 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1259 { 1260 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1261 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1262 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1263 PetscReal sum = 0.0; 1264 PetscScalar *v; 1265 1266 PetscFunctionBegin; 1267 if (aij->size == 1) { 1268 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1269 } else { 1270 if (type == NORM_FROBENIUS) { 1271 v = amat->a; 1272 for (i=0; i<amat->nz; i++) { 1273 #if defined(PETSC_USE_COMPLEX) 1274 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1275 #else 1276 sum += (*v)*(*v); v++; 1277 #endif 1278 } 1279 v = bmat->a; 1280 for (i=0; i<bmat->nz; i++) { 1281 #if defined(PETSC_USE_COMPLEX) 1282 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1283 #else 1284 sum += (*v)*(*v); v++; 1285 #endif 1286 } 1287 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1288 *norm = sqrt(*norm); 1289 } else if (type == NORM_1) { /* max column norm */ 1290 PetscReal *tmp,*tmp2; 1291 int *jj,*garray = aij->garray; 1292 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1293 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1294 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1295 *norm = 0.0; 1296 v = amat->a; jj = amat->j; 1297 for (j=0; j<amat->nz; j++) { 1298 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1299 } 1300 v = bmat->a; jj = bmat->j; 1301 for (j=0; j<bmat->nz; j++) { 1302 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1303 } 1304 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1305 for (j=0; j<mat->N; j++) { 1306 if (tmp2[j] > *norm) *norm = tmp2[j]; 1307 } 1308 ierr = PetscFree(tmp);CHKERRQ(ierr); 1309 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1310 } else if (type == NORM_INFINITY) { /* max row norm */ 1311 PetscReal ntemp = 0.0; 1312 for (j=0; j<aij->A->m; j++) { 1313 v = amat->a + amat->i[j] + shift; 1314 sum = 0.0; 1315 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1316 sum += PetscAbsScalar(*v); v++; 1317 } 1318 v = bmat->a + bmat->i[j] + shift; 1319 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1320 sum += PetscAbsScalar(*v); v++; 1321 } 1322 if (sum > ntemp) ntemp = sum; 1323 } 1324 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1325 } else { 1326 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1327 } 1328 } 1329 PetscFunctionReturn(0); 1330 } 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatTranspose_MPIAIJ" 1334 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1335 { 1336 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1337 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1338 int ierr,shift = Aloc->indexshift; 1339 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1340 Mat B; 1341 PetscScalar *array; 1342 1343 PetscFunctionBegin; 1344 if (!matout && M != N) { 1345 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1346 } 1347 1348 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1349 1350 /* copy over the A part */ 1351 Aloc = (Mat_SeqAIJ*)a->A->data; 1352 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1353 row = a->rstart; 1354 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1355 for (i=0; i<m; i++) { 1356 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1357 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1358 } 1359 aj = Aloc->j; 1360 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1361 1362 /* copy over the B part */ 1363 Aloc = (Mat_SeqAIJ*)a->B->data; 1364 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1365 row = a->rstart; 1366 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1367 ct = cols; 1368 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1369 for (i=0; i<m; i++) { 1370 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1371 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1372 } 1373 ierr = PetscFree(ct);CHKERRQ(ierr); 1374 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1375 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1376 if (matout) { 1377 *matout = B; 1378 } else { 1379 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1380 } 1381 PetscFunctionReturn(0); 1382 } 1383 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1386 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1387 { 1388 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1389 Mat a = aij->A,b = aij->B; 1390 int ierr,s1,s2,s3; 1391 1392 PetscFunctionBegin; 1393 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1394 if (rr) { 1395 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1396 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1397 /* Overlap communication with computation. */ 1398 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1399 } 1400 if (ll) { 1401 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1402 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1403 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1404 } 1405 /* scale the diagonal block */ 1406 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1407 1408 if (rr) { 1409 /* Do a scatter end and then right scale the off-diagonal block */ 1410 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1411 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1412 } 1413 1414 PetscFunctionReturn(0); 1415 } 1416 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1420 int MatPrintHelp_MPIAIJ(Mat A) 1421 { 1422 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1423 int ierr; 1424 1425 PetscFunctionBegin; 1426 if (!a->rank) { 1427 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1434 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1435 { 1436 PetscFunctionBegin; 1437 *bs = 1; 1438 PetscFunctionReturn(0); 1439 } 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1442 int MatSetUnfactored_MPIAIJ(Mat A) 1443 { 1444 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1445 int ierr; 1446 1447 PetscFunctionBegin; 1448 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatEqual_MPIAIJ" 1454 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1455 { 1456 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1457 Mat a,b,c,d; 1458 PetscTruth flg; 1459 int ierr; 1460 1461 PetscFunctionBegin; 1462 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1463 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1464 a = matA->A; b = matA->B; 1465 c = matB->A; d = matB->B; 1466 1467 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1468 if (flg == PETSC_TRUE) { 1469 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1470 } 1471 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatCopy_MPIAIJ" 1477 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1478 { 1479 int ierr; 1480 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1481 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1482 PetscTruth flg; 1483 1484 PetscFunctionBegin; 1485 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1486 if (str != SAME_NONZERO_PATTERN || !flg) { 1487 /* because of the column compression in the off-processor part of the matrix a->B, 1488 the number of columns in a->B and b->B may be different, hence we cannot call 1489 the MatCopy() directly on the two parts. If need be, we can provide a more 1490 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1491 then copying the submatrices */ 1492 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1493 } else { 1494 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1495 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1496 } 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1502 int MatSetUpPreallocation_MPIAIJ(Mat A) 1503 { 1504 int ierr; 1505 1506 PetscFunctionBegin; 1507 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1508 PetscFunctionReturn(0); 1509 } 1510 1511 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1512 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1513 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1514 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1515 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1516 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1517 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1518 #endif 1519 1520 #include "petscblaslapack.h" 1521 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure); 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "MatAXPY_MPIAIJ" 1524 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1525 { 1526 int ierr,one=1; 1527 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1528 Mat_SeqAIJ *x,*y; 1529 1530 PetscFunctionBegin; 1531 if (str == SAME_NONZERO_PATTERN) { 1532 x = (Mat_SeqAIJ *)xx->A->data; 1533 y = (Mat_SeqAIJ *)yy->A->data; 1534 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1535 x = (Mat_SeqAIJ *)xx->B->data; 1536 y = (Mat_SeqAIJ *)yy->B->data; 1537 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1538 } else if (str == SUBSET_NONZERO_PATTERN) { 1539 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1540 1541 1542 int *xtoy,row,i,xcol,ycol,nz,jx,jy; 1543 x = (Mat_SeqAIJ *)xx->B->data; 1544 y = (Mat_SeqAIJ *)yy->B->data; 1545 int *xi=x->i,*yi=y->i; 1546 1547 if (y->xtoy && y->XtoY != xx->B) { 1548 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1549 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1550 } 1551 if (!y->xtoy) { /* get xtoy */ 1552 ierr = PetscMalloc(x->nz*sizeof(int),&xtoy);CHKERRQ(ierr); 1553 i = 0; 1554 for (row=0; row<xx->B->m; row++){ 1555 nz = xi[1] - xi[0]; 1556 jy = 0; 1557 for (jx=0; jx<nz; jx++,jy++){ 1558 xcol = xx->garray[x->j[*xi + jx]]; 1559 ycol = yy->garray[y->j[*yi + jy]]; 1560 while ( ycol < xcol ) { 1561 jy++; 1562 ycol = yy->garray[y->j[*yi + jy]]; 1563 } 1564 if (xcol != ycol) SETERRQ2(PETSC_ERR_ARG_WRONG,"X matrix entry (%d,%d) is not in Y matrix",row,ycol); 1565 xtoy[i++] = *yi + jy; 1566 } 1567 xi++; yi++; 1568 } 1569 y->xtoy = xtoy; /* attach xtoy to the denser matrix Y */ 1570 y->XtoY = xx->B; 1571 } 1572 1573 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1574 } else { 1575 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 /* -------------------------------------------------------------------*/ 1581 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1582 MatGetRow_MPIAIJ, 1583 MatRestoreRow_MPIAIJ, 1584 MatMult_MPIAIJ, 1585 MatMultAdd_MPIAIJ, 1586 MatMultTranspose_MPIAIJ, 1587 MatMultTransposeAdd_MPIAIJ, 1588 0, 1589 0, 1590 0, 1591 0, 1592 0, 1593 0, 1594 MatRelax_MPIAIJ, 1595 MatTranspose_MPIAIJ, 1596 MatGetInfo_MPIAIJ, 1597 MatEqual_MPIAIJ, 1598 MatGetDiagonal_MPIAIJ, 1599 MatDiagonalScale_MPIAIJ, 1600 MatNorm_MPIAIJ, 1601 MatAssemblyBegin_MPIAIJ, 1602 MatAssemblyEnd_MPIAIJ, 1603 0, 1604 MatSetOption_MPIAIJ, 1605 MatZeroEntries_MPIAIJ, 1606 MatZeroRows_MPIAIJ, 1607 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1608 MatLUFactorSymbolic_MPIAIJ_TFS, 1609 #else 1610 0, 1611 #endif 1612 0, 1613 0, 1614 0, 1615 MatSetUpPreallocation_MPIAIJ, 1616 0, 1617 0, 1618 0, 1619 0, 1620 MatDuplicate_MPIAIJ, 1621 0, 1622 0, 1623 0, 1624 0, 1625 MatAXPY_MPIAIJ, 1626 MatGetSubMatrices_MPIAIJ, 1627 MatIncreaseOverlap_MPIAIJ, 1628 MatGetValues_MPIAIJ, 1629 MatCopy_MPIAIJ, 1630 MatPrintHelp_MPIAIJ, 1631 MatScale_MPIAIJ, 1632 0, 1633 0, 1634 0, 1635 MatGetBlockSize_MPIAIJ, 1636 0, 1637 0, 1638 0, 1639 0, 1640 MatFDColoringCreate_MPIAIJ, 1641 0, 1642 MatSetUnfactored_MPIAIJ, 1643 0, 1644 0, 1645 MatGetSubMatrix_MPIAIJ, 1646 MatDestroy_MPIAIJ, 1647 MatView_MPIAIJ, 1648 MatGetPetscMaps_Petsc, 1649 0, 1650 0, 1651 0, 1652 0, 1653 0, 1654 0, 1655 0, 1656 0, 1657 MatSetColoring_MPIAIJ, 1658 MatSetValuesAdic_MPIAIJ, 1659 MatSetValuesAdifor_MPIAIJ 1660 }; 1661 1662 /* ----------------------------------------------------------------------------------------*/ 1663 1664 EXTERN_C_BEGIN 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1667 int MatStoreValues_MPIAIJ(Mat mat) 1668 { 1669 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1670 int ierr; 1671 1672 PetscFunctionBegin; 1673 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1674 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 EXTERN_C_END 1678 1679 EXTERN_C_BEGIN 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1682 int MatRetrieveValues_MPIAIJ(Mat mat) 1683 { 1684 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1685 int ierr; 1686 1687 PetscFunctionBegin; 1688 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1689 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1690 PetscFunctionReturn(0); 1691 } 1692 EXTERN_C_END 1693 1694 #include "petscpc.h" 1695 EXTERN_C_BEGIN 1696 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1697 EXTERN_C_END 1698 1699 EXTERN_C_BEGIN 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatCreate_MPIAIJ" 1702 int MatCreate_MPIAIJ(Mat B) 1703 { 1704 Mat_MPIAIJ *b; 1705 int ierr,i,size; 1706 1707 PetscFunctionBegin; 1708 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1709 1710 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1711 B->data = (void*)b; 1712 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1713 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1714 B->factor = 0; 1715 B->assembled = PETSC_FALSE; 1716 B->mapping = 0; 1717 1718 B->insertmode = NOT_SET_VALUES; 1719 b->size = size; 1720 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1721 1722 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1723 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1724 1725 /* the information in the maps duplicates the information computed below, eventually 1726 we should remove the duplicate information that is not contained in the maps */ 1727 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1728 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1729 1730 /* build local table of row and column ownerships */ 1731 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1732 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1733 b->cowners = b->rowners + b->size + 2; 1734 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1735 b->rowners[0] = 0; 1736 for (i=2; i<=b->size; i++) { 1737 b->rowners[i] += b->rowners[i-1]; 1738 } 1739 b->rstart = b->rowners[b->rank]; 1740 b->rend = b->rowners[b->rank+1]; 1741 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1742 b->cowners[0] = 0; 1743 for (i=2; i<=b->size; i++) { 1744 b->cowners[i] += b->cowners[i-1]; 1745 } 1746 b->cstart = b->cowners[b->rank]; 1747 b->cend = b->cowners[b->rank+1]; 1748 1749 /* build cache for off array entries formed */ 1750 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1751 b->donotstash = PETSC_FALSE; 1752 b->colmap = 0; 1753 b->garray = 0; 1754 b->roworiented = PETSC_TRUE; 1755 1756 /* stuff used for matrix vector multiply */ 1757 b->lvec = PETSC_NULL; 1758 b->Mvctx = PETSC_NULL; 1759 1760 /* stuff for MatGetRow() */ 1761 b->rowindices = 0; 1762 b->rowvalues = 0; 1763 b->getrowactive = PETSC_FALSE; 1764 1765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1766 "MatStoreValues_MPIAIJ", 1767 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1768 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1769 "MatRetrieveValues_MPIAIJ", 1770 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1771 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1772 "MatGetDiagonalBlock_MPIAIJ", 1773 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1774 1775 PetscFunctionReturn(0); 1776 } 1777 EXTERN_C_END 1778 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1781 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1782 { 1783 Mat mat; 1784 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1785 int ierr; 1786 1787 PetscFunctionBegin; 1788 *newmat = 0; 1789 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1790 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1791 a = (Mat_MPIAIJ*)mat->data; 1792 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1793 mat->factor = matin->factor; 1794 mat->assembled = PETSC_TRUE; 1795 mat->insertmode = NOT_SET_VALUES; 1796 mat->preallocated = PETSC_TRUE; 1797 1798 a->rstart = oldmat->rstart; 1799 a->rend = oldmat->rend; 1800 a->cstart = oldmat->cstart; 1801 a->cend = oldmat->cend; 1802 a->size = oldmat->size; 1803 a->rank = oldmat->rank; 1804 a->donotstash = oldmat->donotstash; 1805 a->roworiented = oldmat->roworiented; 1806 a->rowindices = 0; 1807 a->rowvalues = 0; 1808 a->getrowactive = PETSC_FALSE; 1809 1810 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1811 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1812 if (oldmat->colmap) { 1813 #if defined (PETSC_USE_CTABLE) 1814 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1815 #else 1816 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1817 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1818 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1819 #endif 1820 } else a->colmap = 0; 1821 if (oldmat->garray) { 1822 int len; 1823 len = oldmat->B->n; 1824 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1825 PetscLogObjectMemory(mat,len*sizeof(int)); 1826 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1827 } else a->garray = 0; 1828 1829 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1830 PetscLogObjectParent(mat,a->lvec); 1831 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1832 PetscLogObjectParent(mat,a->Mvctx); 1833 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1834 PetscLogObjectParent(mat,a->A); 1835 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1836 PetscLogObjectParent(mat,a->B); 1837 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1838 *newmat = mat; 1839 PetscFunctionReturn(0); 1840 } 1841 1842 #include "petscsys.h" 1843 1844 EXTERN_C_BEGIN 1845 #undef __FUNCT__ 1846 #define __FUNCT__ "MatLoad_MPIAIJ" 1847 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1848 { 1849 Mat A; 1850 PetscScalar *vals,*svals; 1851 MPI_Comm comm = ((PetscObject)viewer)->comm; 1852 MPI_Status status; 1853 int i,nz,ierr,j,rstart,rend,fd; 1854 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1855 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1856 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1857 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) 1858 PetscTruth flag; 1859 #endif 1860 1861 PetscFunctionBegin; 1862 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1863 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1864 if (!rank) { 1865 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1866 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1867 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1868 if (header[3] < 0) { 1869 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1870 } 1871 } 1872 1873 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1874 M = header[1]; N = header[2]; 1875 /* determine ownership of all rows */ 1876 m = M/size + ((M % size) > rank); 1877 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1878 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1879 rowners[0] = 0; 1880 for (i=2; i<=size; i++) { 1881 rowners[i] += rowners[i-1]; 1882 } 1883 rstart = rowners[rank]; 1884 rend = rowners[rank+1]; 1885 1886 /* distribute row lengths to all processors */ 1887 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1888 offlens = ourlens + (rend-rstart); 1889 if (!rank) { 1890 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1891 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1892 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1893 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1894 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1895 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1896 } else { 1897 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1898 } 1899 1900 if (!rank) { 1901 /* calculate the number of nonzeros on each processor */ 1902 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1903 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1904 for (i=0; i<size; i++) { 1905 for (j=rowners[i]; j< rowners[i+1]; j++) { 1906 procsnz[i] += rowlengths[j]; 1907 } 1908 } 1909 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1910 1911 /* determine max buffer needed and allocate it */ 1912 maxnz = 0; 1913 for (i=0; i<size; i++) { 1914 maxnz = PetscMax(maxnz,procsnz[i]); 1915 } 1916 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1917 1918 /* read in my part of the matrix column indices */ 1919 nz = procsnz[0]; 1920 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1921 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1922 1923 /* read in every one elses and ship off */ 1924 for (i=1; i<size; i++) { 1925 nz = procsnz[i]; 1926 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1927 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1928 } 1929 ierr = PetscFree(cols);CHKERRQ(ierr); 1930 } else { 1931 /* determine buffer space needed for message */ 1932 nz = 0; 1933 for (i=0; i<m; i++) { 1934 nz += ourlens[i]; 1935 } 1936 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1937 1938 /* receive message of column indices*/ 1939 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1940 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1941 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1942 } 1943 1944 /* determine column ownership if matrix is not square */ 1945 if (N != M) { 1946 n = N/size + ((N % size) > rank); 1947 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1948 cstart = cend - n; 1949 } else { 1950 cstart = rstart; 1951 cend = rend; 1952 n = cend - cstart; 1953 } 1954 1955 /* loop over local rows, determining number of off diagonal entries */ 1956 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1957 jj = 0; 1958 for (i=0; i<m; i++) { 1959 for (j=0; j<ourlens[i]; j++) { 1960 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1961 jj++; 1962 } 1963 } 1964 1965 /* create our matrix */ 1966 for (i=0; i<m; i++) { 1967 ourlens[i] -= offlens[i]; 1968 } 1969 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1970 A = *newmat; 1971 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1972 for (i=0; i<m; i++) { 1973 ourlens[i] += offlens[i]; 1974 } 1975 1976 if (!rank) { 1977 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1978 1979 /* read in my part of the matrix numerical values */ 1980 nz = procsnz[0]; 1981 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1982 1983 /* insert into matrix */ 1984 jj = rstart; 1985 smycols = mycols; 1986 svals = vals; 1987 for (i=0; i<m; i++) { 1988 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1989 smycols += ourlens[i]; 1990 svals += ourlens[i]; 1991 jj++; 1992 } 1993 1994 /* read in other processors and ship out */ 1995 for (i=1; i<size; i++) { 1996 nz = procsnz[i]; 1997 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1998 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1999 } 2000 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2001 } else { 2002 /* receive numeric values */ 2003 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2004 2005 /* receive message of values*/ 2006 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2007 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2008 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2009 2010 /* insert into matrix */ 2011 jj = rstart; 2012 smycols = mycols; 2013 svals = vals; 2014 for (i=0; i<m; i++) { 2015 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2016 smycols += ourlens[i]; 2017 svals += ourlens[i]; 2018 jj++; 2019 } 2020 } 2021 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2022 ierr = PetscFree(vals);CHKERRQ(ierr); 2023 ierr = PetscFree(mycols);CHKERRQ(ierr); 2024 ierr = PetscFree(rowners);CHKERRQ(ierr); 2025 2026 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2027 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2028 #if defined(PETSC_HAVE_SPOOLES) 2029 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 2030 if (flag) { 2031 if (size == 1) { 2032 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2033 } else { 2034 ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); 2035 } 2036 } 2037 #endif 2038 #if defined(PETSC_HAVE_SUPERLUDIST) 2039 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 2040 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 2041 #endif 2042 PetscFunctionReturn(0); 2043 } 2044 EXTERN_C_END 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2048 /* 2049 Not great since it makes two copies of the submatrix, first an SeqAIJ 2050 in local and then by concatenating the local matrices the end result. 2051 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2052 */ 2053 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2054 { 2055 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2056 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2057 Mat *local,M,Mreuse; 2058 PetscScalar *vwork,*aa; 2059 MPI_Comm comm = mat->comm; 2060 Mat_SeqAIJ *aij; 2061 2062 2063 PetscFunctionBegin; 2064 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2065 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2066 2067 if (call == MAT_REUSE_MATRIX) { 2068 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2069 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2070 local = &Mreuse; 2071 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2072 } else { 2073 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2074 Mreuse = *local; 2075 ierr = PetscFree(local);CHKERRQ(ierr); 2076 } 2077 2078 /* 2079 m - number of local rows 2080 n - number of columns (same on all processors) 2081 rstart - first row in new global matrix generated 2082 */ 2083 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2084 if (call == MAT_INITIAL_MATRIX) { 2085 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2086 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2087 ii = aij->i; 2088 jj = aij->j; 2089 2090 /* 2091 Determine the number of non-zeros in the diagonal and off-diagonal 2092 portions of the matrix in order to do correct preallocation 2093 */ 2094 2095 /* first get start and end of "diagonal" columns */ 2096 if (csize == PETSC_DECIDE) { 2097 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2098 if (mglobal == n) { /* square matrix */ 2099 nlocal = m; 2100 } else { 2101 nlocal = n/size + ((n % size) > rank); 2102 } 2103 } else { 2104 nlocal = csize; 2105 } 2106 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2107 rstart = rend - nlocal; 2108 if (rank == size - 1 && rend != n) { 2109 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 2110 } 2111 2112 /* next, compute all the lengths */ 2113 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2114 olens = dlens + m; 2115 for (i=0; i<m; i++) { 2116 jend = ii[i+1] - ii[i]; 2117 olen = 0; 2118 dlen = 0; 2119 for (j=0; j<jend; j++) { 2120 if (*jj < rstart || *jj >= rend) olen++; 2121 else dlen++; 2122 jj++; 2123 } 2124 olens[i] = olen; 2125 dlens[i] = dlen; 2126 } 2127 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2128 ierr = PetscFree(dlens);CHKERRQ(ierr); 2129 } else { 2130 int ml,nl; 2131 2132 M = *newmat; 2133 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2134 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2135 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2136 /* 2137 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2138 rather than the slower MatSetValues(). 2139 */ 2140 M->was_assembled = PETSC_TRUE; 2141 M->assembled = PETSC_FALSE; 2142 } 2143 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2144 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2145 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2146 ii = aij->i; 2147 jj = aij->j; 2148 aa = aij->a; 2149 for (i=0; i<m; i++) { 2150 row = rstart + i; 2151 nz = ii[i+1] - ii[i]; 2152 cwork = jj; jj += nz; 2153 vwork = aa; aa += nz; 2154 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2155 } 2156 2157 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 *newmat = M; 2160 2161 /* save submatrix used in processor for next request */ 2162 if (call == MAT_INITIAL_MATRIX) { 2163 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2164 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2165 } 2166 2167 PetscFunctionReturn(0); 2168 } 2169 2170 #undef __FUNCT__ 2171 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2172 /*@C 2173 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2174 (the default parallel PETSc format). For good matrix assembly performance 2175 the user should preallocate the matrix storage by setting the parameters 2176 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2177 performance can be increased by more than a factor of 50. 2178 2179 Collective on MPI_Comm 2180 2181 Input Parameters: 2182 + A - the matrix 2183 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2184 (same value is used for all local rows) 2185 . d_nnz - array containing the number of nonzeros in the various rows of the 2186 DIAGONAL portion of the local submatrix (possibly different for each row) 2187 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2188 The size of this array is equal to the number of local rows, i.e 'm'. 2189 You must leave room for the diagonal entry even if it is zero. 2190 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2191 submatrix (same value is used for all local rows). 2192 - o_nnz - array containing the number of nonzeros in the various rows of the 2193 OFF-DIAGONAL portion of the local submatrix (possibly different for 2194 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2195 structure. The size of this array is equal to the number 2196 of local rows, i.e 'm'. 2197 2198 The AIJ format (also called the Yale sparse matrix format or 2199 compressed row storage), is fully compatible with standard Fortran 77 2200 storage. That is, the stored row and column indices can begin at 2201 either one (as in Fortran) or zero. See the users manual for details. 2202 2203 The user MUST specify either the local or global matrix dimensions 2204 (possibly both). 2205 2206 The parallel matrix is partitioned such that the first m0 rows belong to 2207 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2208 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2209 2210 The DIAGONAL portion of the local submatrix of a processor can be defined 2211 as the submatrix which is obtained by extraction the part corresponding 2212 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2213 first row that belongs to the processor, and r2 is the last row belonging 2214 to the this processor. This is a square mxm matrix. The remaining portion 2215 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2216 2217 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2218 2219 By default, this format uses inodes (identical nodes) when possible. 2220 We search for consecutive rows with the same nonzero structure, thereby 2221 reusing matrix information to achieve increased efficiency. 2222 2223 Options Database Keys: 2224 + -mat_aij_no_inode - Do not use inodes 2225 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2226 - -mat_aij_oneindex - Internally use indexing starting at 1 2227 rather than 0. Note that when calling MatSetValues(), 2228 the user still MUST index entries starting at 0! 2229 2230 Example usage: 2231 2232 Consider the following 8x8 matrix with 34 non-zero values, that is 2233 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2234 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2235 as follows: 2236 2237 .vb 2238 1 2 0 | 0 3 0 | 0 4 2239 Proc0 0 5 6 | 7 0 0 | 8 0 2240 9 0 10 | 11 0 0 | 12 0 2241 ------------------------------------- 2242 13 0 14 | 15 16 17 | 0 0 2243 Proc1 0 18 0 | 19 20 21 | 0 0 2244 0 0 0 | 22 23 0 | 24 0 2245 ------------------------------------- 2246 Proc2 25 26 27 | 0 0 28 | 29 0 2247 30 0 0 | 31 32 33 | 0 34 2248 .ve 2249 2250 This can be represented as a collection of submatrices as: 2251 2252 .vb 2253 A B C 2254 D E F 2255 G H I 2256 .ve 2257 2258 Where the submatrices A,B,C are owned by proc0, D,E,F are 2259 owned by proc1, G,H,I are owned by proc2. 2260 2261 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2262 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2263 The 'M','N' parameters are 8,8, and have the same values on all procs. 2264 2265 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2266 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2267 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2268 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2269 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2270 matrix, ans [DF] as another SeqAIJ matrix. 2271 2272 When d_nz, o_nz parameters are specified, d_nz storage elements are 2273 allocated for every row of the local diagonal submatrix, and o_nz 2274 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2275 One way to choose d_nz and o_nz is to use the max nonzerors per local 2276 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2277 In this case, the values of d_nz,o_nz are: 2278 .vb 2279 proc0 : dnz = 2, o_nz = 2 2280 proc1 : dnz = 3, o_nz = 2 2281 proc2 : dnz = 1, o_nz = 4 2282 .ve 2283 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2284 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2285 for proc3. i.e we are using 12+15+10=37 storage locations to store 2286 34 values. 2287 2288 When d_nnz, o_nnz parameters are specified, the storage is specified 2289 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2290 In the above case the values for d_nnz,o_nnz are: 2291 .vb 2292 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2293 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2294 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2295 .ve 2296 Here the space allocated is sum of all the above values i.e 34, and 2297 hence pre-allocation is perfect. 2298 2299 Level: intermediate 2300 2301 .keywords: matrix, aij, compressed row, sparse, parallel 2302 2303 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2304 @*/ 2305 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2306 { 2307 Mat_MPIAIJ *b; 2308 int ierr,i; 2309 PetscTruth flg2; 2310 2311 PetscFunctionBegin; 2312 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2313 if (!flg2) PetscFunctionReturn(0); 2314 B->preallocated = PETSC_TRUE; 2315 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2316 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2317 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2318 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2319 if (d_nnz) { 2320 for (i=0; i<B->m; i++) { 2321 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2322 } 2323 } 2324 if (o_nnz) { 2325 for (i=0; i<B->m; i++) { 2326 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2327 } 2328 } 2329 b = (Mat_MPIAIJ*)B->data; 2330 2331 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2332 PetscLogObjectParent(B,b->A); 2333 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2334 PetscLogObjectParent(B,b->B); 2335 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatCreateMPIAIJ" 2341 /*@C 2342 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2343 (the default parallel PETSc format). For good matrix assembly performance 2344 the user should preallocate the matrix storage by setting the parameters 2345 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2346 performance can be increased by more than a factor of 50. 2347 2348 Collective on MPI_Comm 2349 2350 Input Parameters: 2351 + comm - MPI communicator 2352 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2353 This value should be the same as the local size used in creating the 2354 y vector for the matrix-vector product y = Ax. 2355 . n - This value should be the same as the local size used in creating the 2356 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2357 calculated if N is given) For square matrices n is almost always m. 2358 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2359 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2360 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2361 (same value is used for all local rows) 2362 . d_nnz - array containing the number of nonzeros in the various rows of the 2363 DIAGONAL portion of the local submatrix (possibly different for each row) 2364 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2365 The size of this array is equal to the number of local rows, i.e 'm'. 2366 You must leave room for the diagonal entry even if it is zero. 2367 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2368 submatrix (same value is used for all local rows). 2369 - o_nnz - array containing the number of nonzeros in the various rows of the 2370 OFF-DIAGONAL portion of the local submatrix (possibly different for 2371 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2372 structure. The size of this array is equal to the number 2373 of local rows, i.e 'm'. 2374 2375 Output Parameter: 2376 . A - the matrix 2377 2378 Notes: 2379 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2380 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2381 storage requirements for this matrix. 2382 2383 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2384 processor than it must be used on all processors that share the object for 2385 that argument. 2386 2387 The AIJ format (also called the Yale sparse matrix format or 2388 compressed row storage), is fully compatible with standard Fortran 77 2389 storage. That is, the stored row and column indices can begin at 2390 either one (as in Fortran) or zero. See the users manual for details. 2391 2392 The user MUST specify either the local or global matrix dimensions 2393 (possibly both). 2394 2395 The parallel matrix is partitioned such that the first m0 rows belong to 2396 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2397 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2398 2399 The DIAGONAL portion of the local submatrix of a processor can be defined 2400 as the submatrix which is obtained by extraction the part corresponding 2401 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2402 first row that belongs to the processor, and r2 is the last row belonging 2403 to the this processor. This is a square mxm matrix. The remaining portion 2404 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2405 2406 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2407 2408 By default, this format uses inodes (identical nodes) when possible. 2409 We search for consecutive rows with the same nonzero structure, thereby 2410 reusing matrix information to achieve increased efficiency. 2411 2412 Options Database Keys: 2413 + -mat_aij_no_inode - Do not use inodes 2414 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2415 - -mat_aij_oneindex - Internally use indexing starting at 1 2416 rather than 0. Note that when calling MatSetValues(), 2417 the user still MUST index entries starting at 0! 2418 2419 2420 Example usage: 2421 2422 Consider the following 8x8 matrix with 34 non-zero values, that is 2423 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2424 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2425 as follows: 2426 2427 .vb 2428 1 2 0 | 0 3 0 | 0 4 2429 Proc0 0 5 6 | 7 0 0 | 8 0 2430 9 0 10 | 11 0 0 | 12 0 2431 ------------------------------------- 2432 13 0 14 | 15 16 17 | 0 0 2433 Proc1 0 18 0 | 19 20 21 | 0 0 2434 0 0 0 | 22 23 0 | 24 0 2435 ------------------------------------- 2436 Proc2 25 26 27 | 0 0 28 | 29 0 2437 30 0 0 | 31 32 33 | 0 34 2438 .ve 2439 2440 This can be represented as a collection of submatrices as: 2441 2442 .vb 2443 A B C 2444 D E F 2445 G H I 2446 .ve 2447 2448 Where the submatrices A,B,C are owned by proc0, D,E,F are 2449 owned by proc1, G,H,I are owned by proc2. 2450 2451 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2452 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2453 The 'M','N' parameters are 8,8, and have the same values on all procs. 2454 2455 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2456 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2457 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2458 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2459 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2460 matrix, ans [DF] as another SeqAIJ matrix. 2461 2462 When d_nz, o_nz parameters are specified, d_nz storage elements are 2463 allocated for every row of the local diagonal submatrix, and o_nz 2464 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2465 One way to choose d_nz and o_nz is to use the max nonzerors per local 2466 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2467 In this case, the values of d_nz,o_nz are: 2468 .vb 2469 proc0 : dnz = 2, o_nz = 2 2470 proc1 : dnz = 3, o_nz = 2 2471 proc2 : dnz = 1, o_nz = 4 2472 .ve 2473 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2474 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2475 for proc3. i.e we are using 12+15+10=37 storage locations to store 2476 34 values. 2477 2478 When d_nnz, o_nnz parameters are specified, the storage is specified 2479 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2480 In the above case the values for d_nnz,o_nnz are: 2481 .vb 2482 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2483 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2484 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2485 .ve 2486 Here the space allocated is sum of all the above values i.e 34, and 2487 hence pre-allocation is perfect. 2488 2489 Level: intermediate 2490 2491 .keywords: matrix, aij, compressed row, sparse, parallel 2492 2493 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2494 @*/ 2495 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2496 { 2497 int ierr,size; 2498 2499 PetscFunctionBegin; 2500 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2501 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2502 if (size > 1) { 2503 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2504 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2505 } else { 2506 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2507 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2508 } 2509 PetscFunctionReturn(0); 2510 } 2511 2512 #undef __FUNCT__ 2513 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2514 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2515 { 2516 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2517 PetscFunctionBegin; 2518 *Ad = a->A; 2519 *Ao = a->B; 2520 *colmap = a->garray; 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2526 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2527 { 2528 int ierr,i; 2529 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2530 2531 PetscFunctionBegin; 2532 if (coloring->ctype == IS_COLORING_LOCAL) { 2533 ISColoringValue *allcolors,*colors; 2534 ISColoring ocoloring; 2535 2536 /* set coloring for diagonal portion */ 2537 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2538 2539 /* set coloring for off-diagonal portion */ 2540 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2541 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2542 for (i=0; i<a->B->n; i++) { 2543 colors[i] = allcolors[a->garray[i]]; 2544 } 2545 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2546 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2547 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2548 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2549 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2550 ISColoringValue *colors; 2551 int *larray; 2552 ISColoring ocoloring; 2553 2554 /* set coloring for diagonal portion */ 2555 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2556 for (i=0; i<a->A->n; i++) { 2557 larray[i] = i + a->cstart; 2558 } 2559 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2560 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2561 for (i=0; i<a->A->n; i++) { 2562 colors[i] = coloring->colors[larray[i]]; 2563 } 2564 ierr = PetscFree(larray);CHKERRQ(ierr); 2565 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2566 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2567 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2568 2569 /* set coloring for off-diagonal portion */ 2570 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2571 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2572 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2573 for (i=0; i<a->B->n; i++) { 2574 colors[i] = coloring->colors[larray[i]]; 2575 } 2576 ierr = PetscFree(larray);CHKERRQ(ierr); 2577 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2578 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2579 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2580 } else { 2581 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2582 } 2583 2584 PetscFunctionReturn(0); 2585 } 2586 2587 #undef __FUNCT__ 2588 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2589 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2590 { 2591 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2592 int ierr; 2593 2594 PetscFunctionBegin; 2595 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2596 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2597 PetscFunctionReturn(0); 2598 } 2599 2600 #undef __FUNCT__ 2601 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2602 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2603 { 2604 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2605 int ierr; 2606 2607 PetscFunctionBegin; 2608 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2609 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "MatMerge" 2615 /*@C 2616 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2617 matrices from each processor 2618 2619 Collective on MPI_Comm 2620 2621 Input Parameters: 2622 + comm - the communicators the parallel matrix will live on 2623 - inmat - the input sequential matrices 2624 2625 Output Parameter: 2626 . outmat - the parallel matrix generated 2627 2628 Level: advanced 2629 2630 Notes: The number of columns of the matrix in EACH of the seperate files 2631 MUST be the same. 2632 2633 @*/ 2634 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2635 { 2636 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2637 PetscScalar *values; 2638 PetscMap columnmap,rowmap; 2639 2640 PetscFunctionBegin; 2641 2642 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2643 2644 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2645 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2646 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2647 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2648 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2649 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2650 2651 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2652 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2653 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2654 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2655 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2656 2657 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2658 for (i=0;i<m;i++) { 2659 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2660 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2661 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2662 } 2663 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2664 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2665 2666 for (i=0;i<m;i++) { 2667 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2668 I = i + rstart; 2669 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2670 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2671 } 2672 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2673 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2674 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2675 2676 PetscFunctionReturn(0); 2677 } 2678 2679 #undef __FUNCT__ 2680 #define __FUNCT__ "MatFileSplit" 2681 int MatFileSplit(Mat A,char *outfile) 2682 { 2683 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2684 PetscViewer out; 2685 char *name; 2686 Mat B; 2687 PetscScalar *values; 2688 2689 PetscFunctionBegin; 2690 2691 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2692 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2693 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2694 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2695 for (i=0;i<m;i++) { 2696 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2697 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2698 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2699 } 2700 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2701 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2702 2703 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2704 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2705 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2706 sprintf(name,"%s.%d",outfile,rank); 2707 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2708 ierr = PetscFree(name); 2709 ierr = MatView(B,out);CHKERRQ(ierr); 2710 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2711 ierr = MatDestroy(B);CHKERRQ(ierr); 2712 PetscFunctionReturn(0); 2713 } 2714