1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift=0; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 289 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 294 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 359 PetscFunctionBegin; 360 if (!aij->donotstash) { 361 while (1) { 362 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 363 if (!flg) break; 364 365 for (i=0; i<n;) { 366 /* Now identify the consecutive vals belonging to the same row */ 367 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 368 if (j < n) ncols = j-i; 369 else ncols = n-i; 370 /* Now assemble all these values with a single function call */ 371 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 372 i = j; 373 } 374 } 375 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 376 } 377 378 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 379 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 380 381 /* determine if any processor has disassembled, if so we must 382 also disassemble ourselfs, in order that we may reassemble. */ 383 /* 384 if nonzero structure of submatrix B cannot change then we know that 385 no processor disassembled thus we can skip this stuff 386 */ 387 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 388 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 389 if (mat->was_assembled && !other_disassembled) { 390 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 391 } 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 396 } 397 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 399 400 if (aij->rowvalues) { 401 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 402 aij->rowvalues = 0; 403 } 404 405 /* used by MatAXPY() */ 406 a->xtoy = 0; b->xtoy = 0; 407 a->XtoY = 0; b->XtoY = 0; 408 409 PetscFunctionReturn(0); 410 } 411 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 414 int MatZeroEntries_MPIAIJ(Mat A) 415 { 416 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 417 int ierr; 418 419 PetscFunctionBegin; 420 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 421 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "MatZeroRows_MPIAIJ" 427 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 428 { 429 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 430 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 431 int *nprocs,j,idx,nsends,row; 432 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 433 int *rvalues,tag = A->tag,count,base,slen,n,*source; 434 int *lens,imdex,*lrows,*values,rstart=l->rstart; 435 MPI_Comm comm = A->comm; 436 MPI_Request *send_waits,*recv_waits; 437 MPI_Status recv_status,*send_status; 438 IS istmp; 439 PetscTruth found; 440 441 PetscFunctionBegin; 442 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 443 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 444 445 /* first count number of contributors to each processor */ 446 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 447 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 448 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 449 for (i=0; i<N; i++) { 450 idx = rows[i]; 451 found = PETSC_FALSE; 452 for (j=0; j<size; j++) { 453 if (idx >= owners[j] && idx < owners[j+1]) { 454 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 455 } 456 } 457 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 458 } 459 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 460 461 /* inform other processors of number of messages and max length*/ 462 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 463 464 /* post receives: */ 465 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 466 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 467 for (i=0; i<nrecvs; i++) { 468 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 469 } 470 471 /* do sends: 472 1) starts[i] gives the starting index in svalues for stuff going to 473 the ith processor 474 */ 475 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 476 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 477 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 478 starts[0] = 0; 479 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 480 for (i=0; i<N; i++) { 481 svalues[starts[owner[i]]++] = rows[i]; 482 } 483 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 484 485 starts[0] = 0; 486 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 487 count = 0; 488 for (i=0; i<size; i++) { 489 if (nprocs[2*i+1]) { 490 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 491 } 492 } 493 ierr = PetscFree(starts);CHKERRQ(ierr); 494 495 base = owners[rank]; 496 497 /* wait on receives */ 498 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 499 source = lens + nrecvs; 500 count = nrecvs; slen = 0; 501 while (count) { 502 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 503 /* unpack receives into our local space */ 504 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 505 source[imdex] = recv_status.MPI_SOURCE; 506 lens[imdex] = n; 507 slen += n; 508 count--; 509 } 510 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 511 512 /* move the data into the send scatter */ 513 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 514 count = 0; 515 for (i=0; i<nrecvs; i++) { 516 values = rvalues + i*nmax; 517 for (j=0; j<lens[i]; j++) { 518 lrows[count++] = values[j] - base; 519 } 520 } 521 ierr = PetscFree(rvalues);CHKERRQ(ierr); 522 ierr = PetscFree(lens);CHKERRQ(ierr); 523 ierr = PetscFree(owner);CHKERRQ(ierr); 524 ierr = PetscFree(nprocs);CHKERRQ(ierr); 525 526 /* actually zap the local rows */ 527 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 528 PetscLogObjectParent(A,istmp); 529 530 /* 531 Zero the required rows. If the "diagonal block" of the matrix 532 is square and the user wishes to set the diagonal we use seperate 533 code so that MatSetValues() is not called for each diagonal allocating 534 new memory, thus calling lots of mallocs and slowing things down. 535 536 Contributed by: Mathew Knepley 537 */ 538 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 539 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 540 if (diag && (l->A->M == l->A->N)) { 541 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 542 } else if (diag) { 543 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 544 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 545 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 546 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 547 } 548 for (i = 0; i < slen; i++) { 549 row = lrows[i] + rstart; 550 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 551 } 552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 } else { 555 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 556 } 557 ierr = ISDestroy(istmp);CHKERRQ(ierr); 558 ierr = PetscFree(lrows);CHKERRQ(ierr); 559 560 /* wait on sends */ 561 if (nsends) { 562 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 563 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 564 ierr = PetscFree(send_status);CHKERRQ(ierr); 565 } 566 ierr = PetscFree(send_waits);CHKERRQ(ierr); 567 ierr = PetscFree(svalues);CHKERRQ(ierr); 568 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatMult_MPIAIJ" 574 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 575 { 576 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 577 int ierr,nt; 578 579 PetscFunctionBegin; 580 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 581 if (nt != A->n) { 582 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 583 } 584 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 586 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatMultAdd_MPIAIJ" 593 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 594 { 595 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 596 int ierr; 597 598 PetscFunctionBegin; 599 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 601 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 608 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 609 { 610 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 611 int ierr; 612 613 PetscFunctionBegin; 614 /* do nondiagonal part */ 615 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 616 /* send it on its way */ 617 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 618 /* do local part */ 619 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 620 /* receive remote parts: note this assumes the values are not actually */ 621 /* inserted in yy until the next line, which is true for my implementation*/ 622 /* but is not perhaps always true. */ 623 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 EXTERN_C_BEGIN 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 630 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 631 { 632 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 633 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 634 IS Me,Notme; 635 int M,N,first,last,*notme,i, ierr; 636 637 PetscFunctionBegin; 638 639 /* Easy test: symmetric diagonal block */ 640 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 641 ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr); 642 if (!*f) PetscFunctionReturn(0); 643 644 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 645 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 646 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 647 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 648 for (i=0; i<first; i++) notme[i] = i; 649 for (i=last; i<M; i++) notme[i-last+first] = i; 650 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 651 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 652 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 653 Aoff = Aoffs[0]; 654 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 655 Boff = Boffs[0]; 656 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 657 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 658 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 659 ierr = ISDestroy(Me); CHKERRQ(ierr); 660 ierr = ISDestroy(Notme); CHKERRQ(ierr); 661 662 PetscFunctionReturn(0); 663 } 664 EXTERN_C_END 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 668 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 669 { 670 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 671 int ierr; 672 673 PetscFunctionBegin; 674 /* do nondiagonal part */ 675 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 676 /* send it on its way */ 677 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 678 /* do local part */ 679 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 680 /* receive remote parts: note this assumes the values are not actually */ 681 /* inserted in yy until the next line, which is true for my implementation*/ 682 /* but is not perhaps always true. */ 683 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 687 /* 688 This only works correctly for square matrices where the subblock A->A is the 689 diagonal block 690 */ 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 693 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 694 { 695 int ierr; 696 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 697 698 PetscFunctionBegin; 699 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 700 if (a->rstart != a->cstart || a->rend != a->cend) { 701 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 702 } 703 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatScale_MPIAIJ" 709 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 710 { 711 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 712 int ierr; 713 714 PetscFunctionBegin; 715 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 716 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNCT__ 721 #define __FUNCT__ "MatDestroy_MPIAIJ" 722 int MatDestroy_MPIAIJ(Mat mat) 723 { 724 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 725 int ierr; 726 727 PetscFunctionBegin; 728 #if defined(PETSC_USE_LOG) 729 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 730 #endif 731 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 732 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 733 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 734 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 735 #if defined (PETSC_USE_CTABLE) 736 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 737 #else 738 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 739 #endif 740 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 741 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 742 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 743 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 744 ierr = PetscFree(aij);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatView_MPIAIJ_Binary" 750 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 751 { 752 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 753 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 754 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 755 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 756 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 757 PetscScalar *column_values; 758 759 PetscFunctionBegin; 760 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 761 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 762 nz = A->nz + B->nz; 763 if (rank == 0) { 764 header[0] = MAT_FILE_COOKIE; 765 header[1] = mat->M; 766 header[2] = mat->N; 767 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 768 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 769 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 770 /* get largest number of rows any processor has */ 771 rlen = mat->m; 772 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 773 for (i=1; i<size; i++) { 774 rlen = PetscMax(rlen,range[i+1] - range[i]); 775 } 776 } else { 777 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 778 rlen = mat->m; 779 } 780 781 /* load up the local row counts */ 782 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 783 for (i=0; i<mat->m; i++) { 784 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 785 } 786 787 /* store the row lengths to the file */ 788 if (rank == 0) { 789 MPI_Status status; 790 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 791 for (i=1; i<size; i++) { 792 rlen = range[i+1] - range[i]; 793 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 794 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 795 } 796 } else { 797 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 798 } 799 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 800 801 /* load up the local column indices */ 802 nzmax = nz; /* )th processor needs space a largest processor needs */ 803 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 804 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 805 cnt = 0; 806 for (i=0; i<mat->m; i++) { 807 for (j=B->i[i]; j<B->i[i+1]; j++) { 808 if ( (col = garray[B->j[j]]) > cstart) break; 809 column_indices[cnt++] = col; 810 } 811 for (k=A->i[i]; k<A->i[i+1]; k++) { 812 column_indices[cnt++] = A->j[k] + cstart; 813 } 814 for (; j<B->i[i+1]; j++) { 815 column_indices[cnt++] = garray[B->j[j]]; 816 } 817 } 818 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 819 820 /* store the column indices to the file */ 821 if (rank == 0) { 822 MPI_Status status; 823 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 824 for (i=1; i<size; i++) { 825 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 826 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 827 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 828 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 829 } 830 } else { 831 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 832 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 833 } 834 ierr = PetscFree(column_indices);CHKERRQ(ierr); 835 836 /* load up the local column values */ 837 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 838 cnt = 0; 839 for (i=0; i<mat->m; i++) { 840 for (j=B->i[i]; j<B->i[i+1]; j++) { 841 if ( garray[B->j[j]] > cstart) break; 842 column_values[cnt++] = B->a[j]; 843 } 844 for (k=A->i[i]; k<A->i[i+1]; k++) { 845 column_values[cnt++] = A->a[k]; 846 } 847 for (; j<B->i[i+1]; j++) { 848 column_values[cnt++] = B->a[j]; 849 } 850 } 851 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 852 853 /* store the column values to the file */ 854 if (rank == 0) { 855 MPI_Status status; 856 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 857 for (i=1; i<size; i++) { 858 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 859 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 860 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 861 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 862 } 863 } else { 864 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 865 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 866 } 867 ierr = PetscFree(column_values);CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 873 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 874 { 875 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 876 int ierr,rank = aij->rank,size = aij->size; 877 PetscTruth isdraw,isascii,flg,isbinary; 878 PetscViewer sviewer; 879 PetscViewerFormat format; 880 881 PetscFunctionBegin; 882 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 883 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 884 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 885 if (isascii) { 886 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 887 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 888 MatInfo info; 889 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 890 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 891 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 892 if (flg) { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 894 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 895 } else { 896 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 897 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 898 } 899 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 901 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 902 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 903 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 904 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } else if (format == PETSC_VIEWER_ASCII_INFO) { 907 PetscFunctionReturn(0); 908 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 909 PetscFunctionReturn(0); 910 } 911 } else if (isbinary) { 912 if (size == 1) { 913 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 914 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 915 } else { 916 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 917 } 918 PetscFunctionReturn(0); 919 } else if (isdraw) { 920 PetscDraw draw; 921 PetscTruth isnull; 922 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 923 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 924 } 925 926 if (size == 1) { 927 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 928 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 929 } else { 930 /* assemble the entire matrix onto first processor. */ 931 Mat A; 932 Mat_SeqAIJ *Aloc; 933 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 934 PetscScalar *a; 935 936 if (!rank) { 937 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 938 } else { 939 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 940 } 941 PetscLogObjectParent(mat,A); 942 943 /* copy over the A part */ 944 Aloc = (Mat_SeqAIJ*)aij->A->data; 945 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 946 row = aij->rstart; 947 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 948 for (i=0; i<m; i++) { 949 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 950 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 951 } 952 aj = Aloc->j; 953 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 954 955 /* copy over the B part */ 956 Aloc = (Mat_SeqAIJ*)aij->B->data; 957 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 958 row = aij->rstart; 959 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 960 ct = cols; 961 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 962 for (i=0; i<m; i++) { 963 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 964 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 965 } 966 ierr = PetscFree(ct);CHKERRQ(ierr); 967 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 968 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 969 /* 970 Everyone has to call to draw the matrix since the graphics waits are 971 synchronized across all processors that share the PetscDraw object 972 */ 973 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 974 if (!rank) { 975 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 976 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 977 } 978 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 979 ierr = MatDestroy(A);CHKERRQ(ierr); 980 } 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatView_MPIAIJ" 986 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 987 { 988 int ierr; 989 PetscTruth isascii,isdraw,issocket,isbinary; 990 991 PetscFunctionBegin; 992 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 993 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 994 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 995 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 996 if (isascii || isdraw || isbinary || issocket) { 997 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 998 } else { 999 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1000 } 1001 PetscFunctionReturn(0); 1002 } 1003 1004 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatRelax_MPIAIJ" 1008 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1009 { 1010 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1011 int ierr; 1012 Vec bb1; 1013 PetscScalar mone=-1.0; 1014 1015 PetscFunctionBegin; 1016 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1017 1018 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1019 1020 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1021 if (flag & SOR_ZERO_INITIAL_GUESS) { 1022 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1023 its--; 1024 } 1025 1026 while (its--) { 1027 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1029 1030 /* update rhs: bb1 = bb - B*x */ 1031 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1032 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1033 1034 /* local sweep */ 1035 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1036 CHKERRQ(ierr); 1037 } 1038 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1039 if (flag & SOR_ZERO_INITIAL_GUESS) { 1040 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1041 its--; 1042 } 1043 while (its--) { 1044 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1045 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1046 1047 /* update rhs: bb1 = bb - B*x */ 1048 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1049 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1050 1051 /* local sweep */ 1052 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1053 CHKERRQ(ierr); 1054 } 1055 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1056 if (flag & SOR_ZERO_INITIAL_GUESS) { 1057 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1058 its--; 1059 } 1060 while (its--) { 1061 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1063 1064 /* update rhs: bb1 = bb - B*x */ 1065 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1066 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1067 1068 /* local sweep */ 1069 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1070 CHKERRQ(ierr); 1071 } 1072 } else { 1073 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1074 } 1075 1076 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1082 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1083 { 1084 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1085 Mat A = mat->A,B = mat->B; 1086 int ierr; 1087 PetscReal isend[5],irecv[5]; 1088 1089 PetscFunctionBegin; 1090 info->block_size = 1.0; 1091 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1092 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1093 isend[3] = info->memory; isend[4] = info->mallocs; 1094 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1095 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1096 isend[3] += info->memory; isend[4] += info->mallocs; 1097 if (flag == MAT_LOCAL) { 1098 info->nz_used = isend[0]; 1099 info->nz_allocated = isend[1]; 1100 info->nz_unneeded = isend[2]; 1101 info->memory = isend[3]; 1102 info->mallocs = isend[4]; 1103 } else if (flag == MAT_GLOBAL_MAX) { 1104 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1105 info->nz_used = irecv[0]; 1106 info->nz_allocated = irecv[1]; 1107 info->nz_unneeded = irecv[2]; 1108 info->memory = irecv[3]; 1109 info->mallocs = irecv[4]; 1110 } else if (flag == MAT_GLOBAL_SUM) { 1111 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1112 info->nz_used = irecv[0]; 1113 info->nz_allocated = irecv[1]; 1114 info->nz_unneeded = irecv[2]; 1115 info->memory = irecv[3]; 1116 info->mallocs = irecv[4]; 1117 } 1118 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1119 info->fill_ratio_needed = 0; 1120 info->factor_mallocs = 0; 1121 info->rows_global = (double)matin->M; 1122 info->columns_global = (double)matin->N; 1123 info->rows_local = (double)matin->m; 1124 info->columns_local = (double)matin->N; 1125 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatSetOption_MPIAIJ" 1131 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 int ierr; 1135 1136 PetscFunctionBegin; 1137 switch (op) { 1138 case MAT_NO_NEW_NONZERO_LOCATIONS: 1139 case MAT_YES_NEW_NONZERO_LOCATIONS: 1140 case MAT_COLUMNS_UNSORTED: 1141 case MAT_COLUMNS_SORTED: 1142 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1143 case MAT_KEEP_ZEROED_ROWS: 1144 case MAT_NEW_NONZERO_LOCATION_ERR: 1145 case MAT_USE_INODES: 1146 case MAT_DO_NOT_USE_INODES: 1147 case MAT_IGNORE_ZERO_ENTRIES: 1148 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1149 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1150 break; 1151 case MAT_ROW_ORIENTED: 1152 a->roworiented = PETSC_TRUE; 1153 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1154 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1155 break; 1156 case MAT_ROWS_SORTED: 1157 case MAT_ROWS_UNSORTED: 1158 case MAT_YES_NEW_DIAGONALS: 1159 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1160 break; 1161 case MAT_COLUMN_ORIENTED: 1162 a->roworiented = PETSC_FALSE; 1163 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1164 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1165 break; 1166 case MAT_IGNORE_OFF_PROC_ENTRIES: 1167 a->donotstash = PETSC_TRUE; 1168 break; 1169 case MAT_NO_NEW_DIAGONALS: 1170 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1171 default: 1172 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1173 } 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatGetRow_MPIAIJ" 1179 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1180 { 1181 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1182 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1183 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1184 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1185 int *cmap,*idx_p; 1186 1187 PetscFunctionBegin; 1188 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1189 mat->getrowactive = PETSC_TRUE; 1190 1191 if (!mat->rowvalues && (idx || v)) { 1192 /* 1193 allocate enough space to hold information from the longest row. 1194 */ 1195 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1196 int max = 1,tmp; 1197 for (i=0; i<matin->m; i++) { 1198 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1199 if (max < tmp) { max = tmp; } 1200 } 1201 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1202 mat->rowindices = (int*)(mat->rowvalues + max); 1203 } 1204 1205 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1206 lrow = row - rstart; 1207 1208 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1209 if (!v) {pvA = 0; pvB = 0;} 1210 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1211 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1212 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1213 nztot = nzA + nzB; 1214 1215 cmap = mat->garray; 1216 if (v || idx) { 1217 if (nztot) { 1218 /* Sort by increasing column numbers, assuming A and B already sorted */ 1219 int imark = -1; 1220 if (v) { 1221 *v = v_p = mat->rowvalues; 1222 for (i=0; i<nzB; i++) { 1223 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1224 else break; 1225 } 1226 imark = i; 1227 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1228 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1229 } 1230 if (idx) { 1231 *idx = idx_p = mat->rowindices; 1232 if (imark > -1) { 1233 for (i=0; i<imark; i++) { 1234 idx_p[i] = cmap[cworkB[i]]; 1235 } 1236 } else { 1237 for (i=0; i<nzB; i++) { 1238 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1239 else break; 1240 } 1241 imark = i; 1242 } 1243 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1244 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1245 } 1246 } else { 1247 if (idx) *idx = 0; 1248 if (v) *v = 0; 1249 } 1250 } 1251 *nz = nztot; 1252 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1253 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1259 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1260 { 1261 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1262 1263 PetscFunctionBegin; 1264 if (aij->getrowactive == PETSC_FALSE) { 1265 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1266 } 1267 aij->getrowactive = PETSC_FALSE; 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatNorm_MPIAIJ" 1273 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1277 int ierr,i,j,cstart = aij->cstart; 1278 PetscReal sum = 0.0; 1279 PetscScalar *v; 1280 1281 PetscFunctionBegin; 1282 if (aij->size == 1) { 1283 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1284 } else { 1285 if (type == NORM_FROBENIUS) { 1286 v = amat->a; 1287 for (i=0; i<amat->nz; i++) { 1288 #if defined(PETSC_USE_COMPLEX) 1289 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1290 #else 1291 sum += (*v)*(*v); v++; 1292 #endif 1293 } 1294 v = bmat->a; 1295 for (i=0; i<bmat->nz; i++) { 1296 #if defined(PETSC_USE_COMPLEX) 1297 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1298 #else 1299 sum += (*v)*(*v); v++; 1300 #endif 1301 } 1302 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1303 *norm = sqrt(*norm); 1304 } else if (type == NORM_1) { /* max column norm */ 1305 PetscReal *tmp,*tmp2; 1306 int *jj,*garray = aij->garray; 1307 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1308 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1309 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1310 *norm = 0.0; 1311 v = amat->a; jj = amat->j; 1312 for (j=0; j<amat->nz; j++) { 1313 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1314 } 1315 v = bmat->a; jj = bmat->j; 1316 for (j=0; j<bmat->nz; j++) { 1317 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1318 } 1319 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1320 for (j=0; j<mat->N; j++) { 1321 if (tmp2[j] > *norm) *norm = tmp2[j]; 1322 } 1323 ierr = PetscFree(tmp);CHKERRQ(ierr); 1324 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1325 } else if (type == NORM_INFINITY) { /* max row norm */ 1326 PetscReal ntemp = 0.0; 1327 for (j=0; j<aij->A->m; j++) { 1328 v = amat->a + amat->i[j]; 1329 sum = 0.0; 1330 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1331 sum += PetscAbsScalar(*v); v++; 1332 } 1333 v = bmat->a + bmat->i[j]; 1334 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1335 sum += PetscAbsScalar(*v); v++; 1336 } 1337 if (sum > ntemp) ntemp = sum; 1338 } 1339 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1340 } else { 1341 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1342 } 1343 } 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatTranspose_MPIAIJ" 1349 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1350 { 1351 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1352 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1353 int ierr; 1354 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1355 Mat B; 1356 PetscScalar *array; 1357 1358 PetscFunctionBegin; 1359 if (!matout && M != N) { 1360 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1361 } 1362 1363 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1364 1365 /* copy over the A part */ 1366 Aloc = (Mat_SeqAIJ*)a->A->data; 1367 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1368 row = a->rstart; 1369 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1370 for (i=0; i<m; i++) { 1371 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1372 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1373 } 1374 aj = Aloc->j; 1375 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1376 1377 /* copy over the B part */ 1378 Aloc = (Mat_SeqAIJ*)a->B->data; 1379 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1380 row = a->rstart; 1381 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1382 ct = cols; 1383 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1384 for (i=0; i<m; i++) { 1385 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1386 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1387 } 1388 ierr = PetscFree(ct);CHKERRQ(ierr); 1389 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 if (matout) { 1392 *matout = B; 1393 } else { 1394 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1395 } 1396 PetscFunctionReturn(0); 1397 } 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1401 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1402 { 1403 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1404 Mat a = aij->A,b = aij->B; 1405 int ierr,s1,s2,s3; 1406 1407 PetscFunctionBegin; 1408 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1409 if (rr) { 1410 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1411 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1412 /* Overlap communication with computation. */ 1413 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1414 } 1415 if (ll) { 1416 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1417 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1418 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1419 } 1420 /* scale the diagonal block */ 1421 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1422 1423 if (rr) { 1424 /* Do a scatter end and then right scale the off-diagonal block */ 1425 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1426 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1427 } 1428 1429 PetscFunctionReturn(0); 1430 } 1431 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1435 int MatPrintHelp_MPIAIJ(Mat A) 1436 { 1437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1438 int ierr; 1439 1440 PetscFunctionBegin; 1441 if (!a->rank) { 1442 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1449 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1450 { 1451 PetscFunctionBegin; 1452 *bs = 1; 1453 PetscFunctionReturn(0); 1454 } 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1457 int MatSetUnfactored_MPIAIJ(Mat A) 1458 { 1459 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1460 int ierr; 1461 1462 PetscFunctionBegin; 1463 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatEqual_MPIAIJ" 1469 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1470 { 1471 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1472 Mat a,b,c,d; 1473 PetscTruth flg; 1474 int ierr; 1475 1476 PetscFunctionBegin; 1477 a = matA->A; b = matA->B; 1478 c = matB->A; d = matB->B; 1479 1480 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1481 if (flg == PETSC_TRUE) { 1482 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1483 } 1484 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatCopy_MPIAIJ" 1490 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1491 { 1492 int ierr; 1493 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1494 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1495 1496 PetscFunctionBegin; 1497 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1498 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1499 /* because of the column compression in the off-processor part of the matrix a->B, 1500 the number of columns in a->B and b->B may be different, hence we cannot call 1501 the MatCopy() directly on the two parts. If need be, we can provide a more 1502 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1503 then copying the submatrices */ 1504 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1505 } else { 1506 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1507 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1514 int MatSetUpPreallocation_MPIAIJ(Mat A) 1515 { 1516 int ierr; 1517 1518 PetscFunctionBegin; 1519 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1524 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS [],int); 1525 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1526 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 1527 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1528 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1529 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1530 #endif 1531 1532 #include "petscblaslapack.h" 1533 extern int MatAXPY_SeqAIJ(const PetscScalar[],Mat,Mat,MatStructure); 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatAXPY_MPIAIJ" 1536 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1537 { 1538 int ierr,one=1,i; 1539 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1540 Mat_SeqAIJ *x,*y; 1541 1542 PetscFunctionBegin; 1543 if (str == SAME_NONZERO_PATTERN) { 1544 x = (Mat_SeqAIJ *)xx->A->data; 1545 y = (Mat_SeqAIJ *)yy->A->data; 1546 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1547 x = (Mat_SeqAIJ *)xx->B->data; 1548 y = (Mat_SeqAIJ *)yy->B->data; 1549 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1550 } else if (str == SUBSET_NONZERO_PATTERN) { 1551 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1552 1553 x = (Mat_SeqAIJ *)xx->B->data; 1554 y = (Mat_SeqAIJ *)yy->B->data; 1555 if (y->xtoy && y->XtoY != xx->B) { 1556 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1557 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1558 } 1559 if (!y->xtoy) { /* get xtoy */ 1560 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1561 y->XtoY = xx->B; 1562 } 1563 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1564 } else { 1565 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 1570 /* -------------------------------------------------------------------*/ 1571 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1572 MatGetRow_MPIAIJ, 1573 MatRestoreRow_MPIAIJ, 1574 MatMult_MPIAIJ, 1575 MatMultAdd_MPIAIJ, 1576 MatMultTranspose_MPIAIJ, 1577 MatMultTransposeAdd_MPIAIJ, 1578 0, 1579 0, 1580 0, 1581 0, 1582 0, 1583 0, 1584 MatRelax_MPIAIJ, 1585 MatTranspose_MPIAIJ, 1586 MatGetInfo_MPIAIJ, 1587 MatEqual_MPIAIJ, 1588 MatGetDiagonal_MPIAIJ, 1589 MatDiagonalScale_MPIAIJ, 1590 MatNorm_MPIAIJ, 1591 MatAssemblyBegin_MPIAIJ, 1592 MatAssemblyEnd_MPIAIJ, 1593 0, 1594 MatSetOption_MPIAIJ, 1595 MatZeroEntries_MPIAIJ, 1596 MatZeroRows_MPIAIJ, 1597 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1598 MatLUFactorSymbolic_MPIAIJ_TFS, 1599 #else 1600 0, 1601 #endif 1602 0, 1603 0, 1604 0, 1605 MatSetUpPreallocation_MPIAIJ, 1606 0, 1607 0, 1608 0, 1609 0, 1610 MatDuplicate_MPIAIJ, 1611 0, 1612 0, 1613 0, 1614 0, 1615 MatAXPY_MPIAIJ, 1616 MatGetSubMatrices_MPIAIJ, 1617 MatIncreaseOverlap_MPIAIJ, 1618 MatGetValues_MPIAIJ, 1619 MatCopy_MPIAIJ, 1620 MatPrintHelp_MPIAIJ, 1621 MatScale_MPIAIJ, 1622 0, 1623 0, 1624 0, 1625 MatGetBlockSize_MPIAIJ, 1626 0, 1627 0, 1628 0, 1629 0, 1630 MatFDColoringCreate_MPIAIJ, 1631 0, 1632 MatSetUnfactored_MPIAIJ, 1633 0, 1634 0, 1635 MatGetSubMatrix_MPIAIJ, 1636 MatDestroy_MPIAIJ, 1637 MatView_MPIAIJ, 1638 MatGetPetscMaps_Petsc, 1639 0, 1640 0, 1641 0, 1642 0, 1643 0, 1644 0, 1645 0, 1646 0, 1647 MatSetColoring_MPIAIJ, 1648 MatSetValuesAdic_MPIAIJ, 1649 MatSetValuesAdifor_MPIAIJ 1650 }; 1651 1652 /* ----------------------------------------------------------------------------------------*/ 1653 1654 EXTERN_C_BEGIN 1655 #undef __FUNCT__ 1656 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1657 int MatStoreValues_MPIAIJ(Mat mat) 1658 { 1659 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1660 int ierr; 1661 1662 PetscFunctionBegin; 1663 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1664 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1665 PetscFunctionReturn(0); 1666 } 1667 EXTERN_C_END 1668 1669 EXTERN_C_BEGIN 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1672 int MatRetrieveValues_MPIAIJ(Mat mat) 1673 { 1674 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1675 int ierr; 1676 1677 PetscFunctionBegin; 1678 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1679 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1680 PetscFunctionReturn(0); 1681 } 1682 EXTERN_C_END 1683 1684 #include "petscpc.h" 1685 EXTERN_C_BEGIN 1686 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1687 EXTERN int MatDiagonalScaleLocal_MPIAIJ(Mat,Vec); 1688 EXTERN_C_END 1689 1690 EXTERN_C_BEGIN 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1693 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1694 { 1695 Mat_MPIAIJ *b; 1696 int ierr,i; 1697 1698 PetscFunctionBegin; 1699 B->preallocated = PETSC_TRUE; 1700 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1701 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1702 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1703 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1704 if (d_nnz) { 1705 for (i=0; i<B->m; i++) { 1706 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1707 } 1708 } 1709 if (o_nnz) { 1710 for (i=0; i<B->m; i++) { 1711 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1712 } 1713 } 1714 b = (Mat_MPIAIJ*)B->data; 1715 1716 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1717 PetscLogObjectParent(B,b->A); 1718 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1719 PetscLogObjectParent(B,b->B); 1720 1721 PetscFunctionReturn(0); 1722 } 1723 EXTERN_C_END 1724 1725 /*MC 1726 MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1727 1728 Options Database Keys: 1729 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1730 1731 Level: beginner 1732 1733 .seealso: MatCreateMPIAIJ 1734 M*/ 1735 1736 EXTERN_C_BEGIN 1737 #undef __FUNCT__ 1738 #define __FUNCT__ "MatCreate_MPIAIJ" 1739 int MatCreate_MPIAIJ(Mat B) 1740 { 1741 Mat_MPIAIJ *b; 1742 int ierr,i,size; 1743 1744 PetscFunctionBegin; 1745 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1746 1747 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1748 B->data = (void*)b; 1749 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1750 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1751 B->factor = 0; 1752 B->assembled = PETSC_FALSE; 1753 B->mapping = 0; 1754 1755 B->insertmode = NOT_SET_VALUES; 1756 b->size = size; 1757 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1758 1759 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1760 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1761 1762 /* the information in the maps duplicates the information computed below, eventually 1763 we should remove the duplicate information that is not contained in the maps */ 1764 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1765 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1766 1767 /* build local table of row and column ownerships */ 1768 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1769 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1770 b->cowners = b->rowners + b->size + 2; 1771 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1772 b->rowners[0] = 0; 1773 for (i=2; i<=b->size; i++) { 1774 b->rowners[i] += b->rowners[i-1]; 1775 } 1776 b->rstart = b->rowners[b->rank]; 1777 b->rend = b->rowners[b->rank+1]; 1778 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1779 b->cowners[0] = 0; 1780 for (i=2; i<=b->size; i++) { 1781 b->cowners[i] += b->cowners[i-1]; 1782 } 1783 b->cstart = b->cowners[b->rank]; 1784 b->cend = b->cowners[b->rank+1]; 1785 1786 /* build cache for off array entries formed */ 1787 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1788 b->donotstash = PETSC_FALSE; 1789 b->colmap = 0; 1790 b->garray = 0; 1791 b->roworiented = PETSC_TRUE; 1792 1793 /* stuff used for matrix vector multiply */ 1794 b->lvec = PETSC_NULL; 1795 b->Mvctx = PETSC_NULL; 1796 1797 /* stuff for MatGetRow() */ 1798 b->rowindices = 0; 1799 b->rowvalues = 0; 1800 b->getrowactive = PETSC_FALSE; 1801 1802 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1803 "MatStoreValues_MPIAIJ", 1804 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1805 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1806 "MatRetrieveValues_MPIAIJ", 1807 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1808 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1809 "MatGetDiagonalBlock_MPIAIJ", 1810 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1811 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1812 "MatIsSymmetric_MPIAIJ", 1813 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1814 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1815 "MatMPIAIJSetPreallocation_MPIAIJ", 1816 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1817 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1818 "MatDiagonalScaleLocal_MPIAIJ", 1819 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1820 PetscFunctionReturn(0); 1821 } 1822 EXTERN_C_END 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1826 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1827 { 1828 Mat mat; 1829 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1830 int ierr; 1831 1832 PetscFunctionBegin; 1833 *newmat = 0; 1834 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1835 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1836 a = (Mat_MPIAIJ*)mat->data; 1837 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1838 mat->factor = matin->factor; 1839 mat->assembled = PETSC_TRUE; 1840 mat->insertmode = NOT_SET_VALUES; 1841 mat->preallocated = PETSC_TRUE; 1842 1843 a->rstart = oldmat->rstart; 1844 a->rend = oldmat->rend; 1845 a->cstart = oldmat->cstart; 1846 a->cend = oldmat->cend; 1847 a->size = oldmat->size; 1848 a->rank = oldmat->rank; 1849 a->donotstash = oldmat->donotstash; 1850 a->roworiented = oldmat->roworiented; 1851 a->rowindices = 0; 1852 a->rowvalues = 0; 1853 a->getrowactive = PETSC_FALSE; 1854 1855 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1856 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1857 if (oldmat->colmap) { 1858 #if defined (PETSC_USE_CTABLE) 1859 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1860 #else 1861 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1862 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1863 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1864 #endif 1865 } else a->colmap = 0; 1866 if (oldmat->garray) { 1867 int len; 1868 len = oldmat->B->n; 1869 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1870 PetscLogObjectMemory(mat,len*sizeof(int)); 1871 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1872 } else a->garray = 0; 1873 1874 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1875 PetscLogObjectParent(mat,a->lvec); 1876 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1877 PetscLogObjectParent(mat,a->Mvctx); 1878 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1879 PetscLogObjectParent(mat,a->A); 1880 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1881 PetscLogObjectParent(mat,a->B); 1882 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1883 *newmat = mat; 1884 PetscFunctionReturn(0); 1885 } 1886 1887 #include "petscsys.h" 1888 1889 EXTERN_C_BEGIN 1890 #undef __FUNCT__ 1891 #define __FUNCT__ "MatLoad_MPIAIJ" 1892 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1893 { 1894 Mat A; 1895 PetscScalar *vals,*svals; 1896 MPI_Comm comm = ((PetscObject)viewer)->comm; 1897 MPI_Status status; 1898 int i,nz,ierr,j,rstart,rend,fd; 1899 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1900 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1901 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1902 1903 PetscFunctionBegin; 1904 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1905 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1906 if (!rank) { 1907 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1908 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1909 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1910 if (header[3] < 0) { 1911 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1912 } 1913 } 1914 1915 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1916 M = header[1]; N = header[2]; 1917 /* determine ownership of all rows */ 1918 m = M/size + ((M % size) > rank); 1919 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1920 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1921 rowners[0] = 0; 1922 for (i=2; i<=size; i++) { 1923 rowners[i] += rowners[i-1]; 1924 } 1925 rstart = rowners[rank]; 1926 rend = rowners[rank+1]; 1927 1928 /* distribute row lengths to all processors */ 1929 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1930 offlens = ourlens + (rend-rstart); 1931 if (!rank) { 1932 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1933 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1934 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1935 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1936 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1937 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1938 } else { 1939 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1940 } 1941 1942 if (!rank) { 1943 /* calculate the number of nonzeros on each processor */ 1944 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1945 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1946 for (i=0; i<size; i++) { 1947 for (j=rowners[i]; j< rowners[i+1]; j++) { 1948 procsnz[i] += rowlengths[j]; 1949 } 1950 } 1951 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1952 1953 /* determine max buffer needed and allocate it */ 1954 maxnz = 0; 1955 for (i=0; i<size; i++) { 1956 maxnz = PetscMax(maxnz,procsnz[i]); 1957 } 1958 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1959 1960 /* read in my part of the matrix column indices */ 1961 nz = procsnz[0]; 1962 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1963 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1964 1965 /* read in every one elses and ship off */ 1966 for (i=1; i<size; i++) { 1967 nz = procsnz[i]; 1968 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1969 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1970 } 1971 ierr = PetscFree(cols);CHKERRQ(ierr); 1972 } else { 1973 /* determine buffer space needed for message */ 1974 nz = 0; 1975 for (i=0; i<m; i++) { 1976 nz += ourlens[i]; 1977 } 1978 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1979 1980 /* receive message of column indices*/ 1981 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1982 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1983 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1984 } 1985 1986 /* determine column ownership if matrix is not square */ 1987 if (N != M) { 1988 n = N/size + ((N % size) > rank); 1989 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1990 cstart = cend - n; 1991 } else { 1992 cstart = rstart; 1993 cend = rend; 1994 n = cend - cstart; 1995 } 1996 1997 /* loop over local rows, determining number of off diagonal entries */ 1998 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1999 jj = 0; 2000 for (i=0; i<m; i++) { 2001 for (j=0; j<ourlens[i]; j++) { 2002 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2003 jj++; 2004 } 2005 } 2006 2007 /* create our matrix */ 2008 for (i=0; i<m; i++) { 2009 ourlens[i] -= offlens[i]; 2010 } 2011 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2012 ierr = MatSetType(A,type);CHKERRQ(ierr); 2013 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2014 2015 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2016 for (i=0; i<m; i++) { 2017 ourlens[i] += offlens[i]; 2018 } 2019 2020 if (!rank) { 2021 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2022 2023 /* read in my part of the matrix numerical values */ 2024 nz = procsnz[0]; 2025 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2026 2027 /* insert into matrix */ 2028 jj = rstart; 2029 smycols = mycols; 2030 svals = vals; 2031 for (i=0; i<m; i++) { 2032 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2033 smycols += ourlens[i]; 2034 svals += ourlens[i]; 2035 jj++; 2036 } 2037 2038 /* read in other processors and ship out */ 2039 for (i=1; i<size; i++) { 2040 nz = procsnz[i]; 2041 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2042 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2043 } 2044 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2045 } else { 2046 /* receive numeric values */ 2047 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2048 2049 /* receive message of values*/ 2050 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2051 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2052 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2053 2054 /* insert into matrix */ 2055 jj = rstart; 2056 smycols = mycols; 2057 svals = vals; 2058 for (i=0; i<m; i++) { 2059 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2060 smycols += ourlens[i]; 2061 svals += ourlens[i]; 2062 jj++; 2063 } 2064 } 2065 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2066 ierr = PetscFree(vals);CHKERRQ(ierr); 2067 ierr = PetscFree(mycols);CHKERRQ(ierr); 2068 ierr = PetscFree(rowners);CHKERRQ(ierr); 2069 2070 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2071 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2072 *newmat = A; 2073 PetscFunctionReturn(0); 2074 } 2075 EXTERN_C_END 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2079 /* 2080 Not great since it makes two copies of the submatrix, first an SeqAIJ 2081 in local and then by concatenating the local matrices the end result. 2082 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2083 */ 2084 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2085 { 2086 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2087 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2088 Mat *local,M,Mreuse; 2089 PetscScalar *vwork,*aa; 2090 MPI_Comm comm = mat->comm; 2091 Mat_SeqAIJ *aij; 2092 2093 2094 PetscFunctionBegin; 2095 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2096 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2097 2098 if (call == MAT_REUSE_MATRIX) { 2099 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2100 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2101 local = &Mreuse; 2102 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2103 } else { 2104 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2105 Mreuse = *local; 2106 ierr = PetscFree(local);CHKERRQ(ierr); 2107 } 2108 2109 /* 2110 m - number of local rows 2111 n - number of columns (same on all processors) 2112 rstart - first row in new global matrix generated 2113 */ 2114 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2115 if (call == MAT_INITIAL_MATRIX) { 2116 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2117 ii = aij->i; 2118 jj = aij->j; 2119 2120 /* 2121 Determine the number of non-zeros in the diagonal and off-diagonal 2122 portions of the matrix in order to do correct preallocation 2123 */ 2124 2125 /* first get start and end of "diagonal" columns */ 2126 if (csize == PETSC_DECIDE) { 2127 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2128 if (mglobal == n) { /* square matrix */ 2129 nlocal = m; 2130 } else { 2131 nlocal = n/size + ((n % size) > rank); 2132 } 2133 } else { 2134 nlocal = csize; 2135 } 2136 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2137 rstart = rend - nlocal; 2138 if (rank == size - 1 && rend != n) { 2139 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2140 } 2141 2142 /* next, compute all the lengths */ 2143 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2144 olens = dlens + m; 2145 for (i=0; i<m; i++) { 2146 jend = ii[i+1] - ii[i]; 2147 olen = 0; 2148 dlen = 0; 2149 for (j=0; j<jend; j++) { 2150 if (*jj < rstart || *jj >= rend) olen++; 2151 else dlen++; 2152 jj++; 2153 } 2154 olens[i] = olen; 2155 dlens[i] = dlen; 2156 } 2157 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2158 ierr = PetscFree(dlens);CHKERRQ(ierr); 2159 } else { 2160 int ml,nl; 2161 2162 M = *newmat; 2163 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2164 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2165 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2166 /* 2167 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2168 rather than the slower MatSetValues(). 2169 */ 2170 M->was_assembled = PETSC_TRUE; 2171 M->assembled = PETSC_FALSE; 2172 } 2173 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2174 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2175 ii = aij->i; 2176 jj = aij->j; 2177 aa = aij->a; 2178 for (i=0; i<m; i++) { 2179 row = rstart + i; 2180 nz = ii[i+1] - ii[i]; 2181 cwork = jj; jj += nz; 2182 vwork = aa; aa += nz; 2183 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2184 } 2185 2186 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2187 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2188 *newmat = M; 2189 2190 /* save submatrix used in processor for next request */ 2191 if (call == MAT_INITIAL_MATRIX) { 2192 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2193 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2194 } 2195 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2201 /*@C 2202 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2203 (the default parallel PETSc format). For good matrix assembly performance 2204 the user should preallocate the matrix storage by setting the parameters 2205 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2206 performance can be increased by more than a factor of 50. 2207 2208 Collective on MPI_Comm 2209 2210 Input Parameters: 2211 + A - the matrix 2212 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2213 (same value is used for all local rows) 2214 . d_nnz - array containing the number of nonzeros in the various rows of the 2215 DIAGONAL portion of the local submatrix (possibly different for each row) 2216 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2217 The size of this array is equal to the number of local rows, i.e 'm'. 2218 You must leave room for the diagonal entry even if it is zero. 2219 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2220 submatrix (same value is used for all local rows). 2221 - o_nnz - array containing the number of nonzeros in the various rows of the 2222 OFF-DIAGONAL portion of the local submatrix (possibly different for 2223 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2224 structure. The size of this array is equal to the number 2225 of local rows, i.e 'm'. 2226 2227 The AIJ format (also called the Yale sparse matrix format or 2228 compressed row storage), is fully compatible with standard Fortran 77 2229 storage. That is, the stored row and column indices can begin at 2230 either one (as in Fortran) or zero. See the users manual for details. 2231 2232 The user MUST specify either the local or global matrix dimensions 2233 (possibly both). 2234 2235 The parallel matrix is partitioned such that the first m0 rows belong to 2236 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2237 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2238 2239 The DIAGONAL portion of the local submatrix of a processor can be defined 2240 as the submatrix which is obtained by extraction the part corresponding 2241 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2242 first row that belongs to the processor, and r2 is the last row belonging 2243 to the this processor. This is a square mxm matrix. The remaining portion 2244 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2245 2246 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2247 2248 By default, this format uses inodes (identical nodes) when possible. 2249 We search for consecutive rows with the same nonzero structure, thereby 2250 reusing matrix information to achieve increased efficiency. 2251 2252 Options Database Keys: 2253 + -mat_aij_no_inode - Do not use inodes 2254 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2255 - -mat_aij_oneindex - Internally use indexing starting at 1 2256 rather than 0. Note that when calling MatSetValues(), 2257 the user still MUST index entries starting at 0! 2258 2259 Example usage: 2260 2261 Consider the following 8x8 matrix with 34 non-zero values, that is 2262 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2263 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2264 as follows: 2265 2266 .vb 2267 1 2 0 | 0 3 0 | 0 4 2268 Proc0 0 5 6 | 7 0 0 | 8 0 2269 9 0 10 | 11 0 0 | 12 0 2270 ------------------------------------- 2271 13 0 14 | 15 16 17 | 0 0 2272 Proc1 0 18 0 | 19 20 21 | 0 0 2273 0 0 0 | 22 23 0 | 24 0 2274 ------------------------------------- 2275 Proc2 25 26 27 | 0 0 28 | 29 0 2276 30 0 0 | 31 32 33 | 0 34 2277 .ve 2278 2279 This can be represented as a collection of submatrices as: 2280 2281 .vb 2282 A B C 2283 D E F 2284 G H I 2285 .ve 2286 2287 Where the submatrices A,B,C are owned by proc0, D,E,F are 2288 owned by proc1, G,H,I are owned by proc2. 2289 2290 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2291 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2292 The 'M','N' parameters are 8,8, and have the same values on all procs. 2293 2294 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2295 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2296 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2297 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2298 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2299 matrix, ans [DF] as another SeqAIJ matrix. 2300 2301 When d_nz, o_nz parameters are specified, d_nz storage elements are 2302 allocated for every row of the local diagonal submatrix, and o_nz 2303 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2304 One way to choose d_nz and o_nz is to use the max nonzerors per local 2305 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2306 In this case, the values of d_nz,o_nz are: 2307 .vb 2308 proc0 : dnz = 2, o_nz = 2 2309 proc1 : dnz = 3, o_nz = 2 2310 proc2 : dnz = 1, o_nz = 4 2311 .ve 2312 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2313 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2314 for proc3. i.e we are using 12+15+10=37 storage locations to store 2315 34 values. 2316 2317 When d_nnz, o_nnz parameters are specified, the storage is specified 2318 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2319 In the above case the values for d_nnz,o_nnz are: 2320 .vb 2321 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2322 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2323 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2324 .ve 2325 Here the space allocated is sum of all the above values i.e 34, and 2326 hence pre-allocation is perfect. 2327 2328 Level: intermediate 2329 2330 .keywords: matrix, aij, compressed row, sparse, parallel 2331 2332 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2333 @*/ 2334 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2335 { 2336 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2337 2338 PetscFunctionBegin; 2339 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2340 if (f) { 2341 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2342 } 2343 PetscFunctionReturn(0); 2344 } 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatCreateMPIAIJ" 2348 /*@C 2349 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2350 (the default parallel PETSc format). For good matrix assembly performance 2351 the user should preallocate the matrix storage by setting the parameters 2352 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2353 performance can be increased by more than a factor of 50. 2354 2355 Collective on MPI_Comm 2356 2357 Input Parameters: 2358 + comm - MPI communicator 2359 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2360 This value should be the same as the local size used in creating the 2361 y vector for the matrix-vector product y = Ax. 2362 . n - This value should be the same as the local size used in creating the 2363 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2364 calculated if N is given) For square matrices n is almost always m. 2365 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2366 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2367 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2368 (same value is used for all local rows) 2369 . d_nnz - array containing the number of nonzeros in the various rows of the 2370 DIAGONAL portion of the local submatrix (possibly different for each row) 2371 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2372 The size of this array is equal to the number of local rows, i.e 'm'. 2373 You must leave room for the diagonal entry even if it is zero. 2374 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2375 submatrix (same value is used for all local rows). 2376 - o_nnz - array containing the number of nonzeros in the various rows of the 2377 OFF-DIAGONAL portion of the local submatrix (possibly different for 2378 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2379 structure. The size of this array is equal to the number 2380 of local rows, i.e 'm'. 2381 2382 Output Parameter: 2383 . A - the matrix 2384 2385 Notes: 2386 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2387 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2388 storage requirements for this matrix. 2389 2390 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2391 processor than it must be used on all processors that share the object for 2392 that argument. 2393 2394 The AIJ format (also called the Yale sparse matrix format or 2395 compressed row storage), is fully compatible with standard Fortran 77 2396 storage. That is, the stored row and column indices can begin at 2397 either one (as in Fortran) or zero. See the users manual for details. 2398 2399 The user MUST specify either the local or global matrix dimensions 2400 (possibly both). 2401 2402 The parallel matrix is partitioned such that the first m0 rows belong to 2403 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2404 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2405 2406 The DIAGONAL portion of the local submatrix of a processor can be defined 2407 as the submatrix which is obtained by extraction the part corresponding 2408 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2409 first row that belongs to the processor, and r2 is the last row belonging 2410 to the this processor. This is a square mxm matrix. The remaining portion 2411 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2412 2413 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2414 2415 When calling this routine with a single process communicator, a matrix of 2416 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2417 type of communicator, use the construction mechanism: 2418 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2419 2420 By default, this format uses inodes (identical nodes) when possible. 2421 We search for consecutive rows with the same nonzero structure, thereby 2422 reusing matrix information to achieve increased efficiency. 2423 2424 Options Database Keys: 2425 + -mat_aij_no_inode - Do not use inodes 2426 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2427 - -mat_aij_oneindex - Internally use indexing starting at 1 2428 rather than 0. Note that when calling MatSetValues(), 2429 the user still MUST index entries starting at 0! 2430 2431 2432 Example usage: 2433 2434 Consider the following 8x8 matrix with 34 non-zero values, that is 2435 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2436 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2437 as follows: 2438 2439 .vb 2440 1 2 0 | 0 3 0 | 0 4 2441 Proc0 0 5 6 | 7 0 0 | 8 0 2442 9 0 10 | 11 0 0 | 12 0 2443 ------------------------------------- 2444 13 0 14 | 15 16 17 | 0 0 2445 Proc1 0 18 0 | 19 20 21 | 0 0 2446 0 0 0 | 22 23 0 | 24 0 2447 ------------------------------------- 2448 Proc2 25 26 27 | 0 0 28 | 29 0 2449 30 0 0 | 31 32 33 | 0 34 2450 .ve 2451 2452 This can be represented as a collection of submatrices as: 2453 2454 .vb 2455 A B C 2456 D E F 2457 G H I 2458 .ve 2459 2460 Where the submatrices A,B,C are owned by proc0, D,E,F are 2461 owned by proc1, G,H,I are owned by proc2. 2462 2463 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2464 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2465 The 'M','N' parameters are 8,8, and have the same values on all procs. 2466 2467 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2468 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2469 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2470 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2471 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2472 matrix, ans [DF] as another SeqAIJ matrix. 2473 2474 When d_nz, o_nz parameters are specified, d_nz storage elements are 2475 allocated for every row of the local diagonal submatrix, and o_nz 2476 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2477 One way to choose d_nz and o_nz is to use the max nonzerors per local 2478 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2479 In this case, the values of d_nz,o_nz are: 2480 .vb 2481 proc0 : dnz = 2, o_nz = 2 2482 proc1 : dnz = 3, o_nz = 2 2483 proc2 : dnz = 1, o_nz = 4 2484 .ve 2485 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2486 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2487 for proc3. i.e we are using 12+15+10=37 storage locations to store 2488 34 values. 2489 2490 When d_nnz, o_nnz parameters are specified, the storage is specified 2491 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2492 In the above case the values for d_nnz,o_nnz are: 2493 .vb 2494 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2495 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2496 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2497 .ve 2498 Here the space allocated is sum of all the above values i.e 34, and 2499 hence pre-allocation is perfect. 2500 2501 Level: intermediate 2502 2503 .keywords: matrix, aij, compressed row, sparse, parallel 2504 2505 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2506 @*/ 2507 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2508 { 2509 int ierr,size; 2510 2511 PetscFunctionBegin; 2512 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2513 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2514 if (size > 1) { 2515 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2516 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2517 } else { 2518 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2519 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2520 } 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2526 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2527 { 2528 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2529 PetscFunctionBegin; 2530 *Ad = a->A; 2531 *Ao = a->B; 2532 *colmap = a->garray; 2533 PetscFunctionReturn(0); 2534 } 2535 2536 #undef __FUNCT__ 2537 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2538 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2539 { 2540 int ierr,i; 2541 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2542 2543 PetscFunctionBegin; 2544 if (coloring->ctype == IS_COLORING_LOCAL) { 2545 ISColoringValue *allcolors,*colors; 2546 ISColoring ocoloring; 2547 2548 /* set coloring for diagonal portion */ 2549 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2550 2551 /* set coloring for off-diagonal portion */ 2552 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2553 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2554 for (i=0; i<a->B->n; i++) { 2555 colors[i] = allcolors[a->garray[i]]; 2556 } 2557 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2558 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2559 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2560 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2561 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2562 ISColoringValue *colors; 2563 int *larray; 2564 ISColoring ocoloring; 2565 2566 /* set coloring for diagonal portion */ 2567 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2568 for (i=0; i<a->A->n; i++) { 2569 larray[i] = i + a->cstart; 2570 } 2571 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2572 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2573 for (i=0; i<a->A->n; i++) { 2574 colors[i] = coloring->colors[larray[i]]; 2575 } 2576 ierr = PetscFree(larray);CHKERRQ(ierr); 2577 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2578 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2579 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2580 2581 /* set coloring for off-diagonal portion */ 2582 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2583 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2584 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2585 for (i=0; i<a->B->n; i++) { 2586 colors[i] = coloring->colors[larray[i]]; 2587 } 2588 ierr = PetscFree(larray);CHKERRQ(ierr); 2589 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2590 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2591 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2592 } else { 2593 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2594 } 2595 2596 PetscFunctionReturn(0); 2597 } 2598 2599 #undef __FUNCT__ 2600 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2601 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2602 { 2603 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2604 int ierr; 2605 2606 PetscFunctionBegin; 2607 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2608 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2609 PetscFunctionReturn(0); 2610 } 2611 2612 #undef __FUNCT__ 2613 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2614 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2615 { 2616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2617 int ierr; 2618 2619 PetscFunctionBegin; 2620 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2621 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2622 PetscFunctionReturn(0); 2623 } 2624 2625 #undef __FUNCT__ 2626 #define __FUNCT__ "MatMerge" 2627 /*@C 2628 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2629 matrices from each processor 2630 2631 Collective on MPI_Comm 2632 2633 Input Parameters: 2634 + comm - the communicators the parallel matrix will live on 2635 - inmat - the input sequential matrices 2636 2637 Output Parameter: 2638 . outmat - the parallel matrix generated 2639 2640 Level: advanced 2641 2642 Notes: The number of columns of the matrix in EACH of the seperate files 2643 MUST be the same. 2644 2645 @*/ 2646 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2647 { 2648 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2649 PetscScalar *values; 2650 PetscMap columnmap,rowmap; 2651 2652 PetscFunctionBegin; 2653 2654 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2655 2656 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2657 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2658 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2659 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2660 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2661 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2662 2663 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2664 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2665 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2666 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2667 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2668 2669 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2670 for (i=0;i<m;i++) { 2671 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2672 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2673 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2674 } 2675 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2676 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2677 2678 for (i=0;i<m;i++) { 2679 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2680 I = i + rstart; 2681 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2682 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2683 } 2684 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2685 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2686 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2687 2688 PetscFunctionReturn(0); 2689 } 2690 2691 #undef __FUNCT__ 2692 #define __FUNCT__ "MatFileSplit" 2693 int MatFileSplit(Mat A,char *outfile) 2694 { 2695 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2696 PetscViewer out; 2697 char *name; 2698 Mat B; 2699 PetscScalar *values; 2700 2701 PetscFunctionBegin; 2702 2703 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2704 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2705 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2706 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2707 for (i=0;i<m;i++) { 2708 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2709 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2710 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2711 } 2712 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2713 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2714 2715 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2716 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2717 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2718 sprintf(name,"%s.%d",outfile,rank); 2719 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2720 ierr = PetscFree(name); 2721 ierr = MatView(B,out);CHKERRQ(ierr); 2722 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2723 ierr = MatDestroy(B);CHKERRQ(ierr); 2724 PetscFunctionReturn(0); 2725 } 2726