1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift=0; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 289 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 294 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 359 PetscFunctionBegin; 360 if (!aij->donotstash) { 361 while (1) { 362 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 363 if (!flg) break; 364 365 for (i=0; i<n;) { 366 /* Now identify the consecutive vals belonging to the same row */ 367 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 368 if (j < n) ncols = j-i; 369 else ncols = n-i; 370 /* Now assemble all these values with a single function call */ 371 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 372 i = j; 373 } 374 } 375 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 376 } 377 378 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 379 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 380 381 /* determine if any processor has disassembled, if so we must 382 also disassemble ourselfs, in order that we may reassemble. */ 383 /* 384 if nonzero structure of submatrix B cannot change then we know that 385 no processor disassembled thus we can skip this stuff 386 */ 387 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 388 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 389 if (mat->was_assembled && !other_disassembled) { 390 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 391 } 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 396 } 397 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 399 400 if (aij->rowvalues) { 401 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 402 aij->rowvalues = 0; 403 } 404 405 /* used by MatAXPY() */ 406 a->xtoy = 0; b->xtoy = 0; 407 a->XtoY = 0; b->XtoY = 0; 408 409 PetscFunctionReturn(0); 410 } 411 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 414 int MatZeroEntries_MPIAIJ(Mat A) 415 { 416 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 417 int ierr; 418 419 PetscFunctionBegin; 420 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 421 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "MatZeroRows_MPIAIJ" 427 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 428 { 429 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 430 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 431 int *nprocs,j,idx,nsends,row; 432 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 433 int *rvalues,tag = A->tag,count,base,slen,n,*source; 434 int *lens,imdex,*lrows,*values,rstart=l->rstart; 435 MPI_Comm comm = A->comm; 436 MPI_Request *send_waits,*recv_waits; 437 MPI_Status recv_status,*send_status; 438 IS istmp; 439 PetscTruth found; 440 441 PetscFunctionBegin; 442 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 443 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 444 445 /* first count number of contributors to each processor */ 446 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 447 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 448 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 449 for (i=0; i<N; i++) { 450 idx = rows[i]; 451 found = PETSC_FALSE; 452 for (j=0; j<size; j++) { 453 if (idx >= owners[j] && idx < owners[j+1]) { 454 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 455 } 456 } 457 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 458 } 459 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 460 461 /* inform other processors of number of messages and max length*/ 462 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 463 464 /* post receives: */ 465 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 466 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 467 for (i=0; i<nrecvs; i++) { 468 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 469 } 470 471 /* do sends: 472 1) starts[i] gives the starting index in svalues for stuff going to 473 the ith processor 474 */ 475 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 476 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 477 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 478 starts[0] = 0; 479 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 480 for (i=0; i<N; i++) { 481 svalues[starts[owner[i]]++] = rows[i]; 482 } 483 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 484 485 starts[0] = 0; 486 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 487 count = 0; 488 for (i=0; i<size; i++) { 489 if (nprocs[2*i+1]) { 490 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 491 } 492 } 493 ierr = PetscFree(starts);CHKERRQ(ierr); 494 495 base = owners[rank]; 496 497 /* wait on receives */ 498 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 499 source = lens + nrecvs; 500 count = nrecvs; slen = 0; 501 while (count) { 502 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 503 /* unpack receives into our local space */ 504 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 505 source[imdex] = recv_status.MPI_SOURCE; 506 lens[imdex] = n; 507 slen += n; 508 count--; 509 } 510 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 511 512 /* move the data into the send scatter */ 513 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 514 count = 0; 515 for (i=0; i<nrecvs; i++) { 516 values = rvalues + i*nmax; 517 for (j=0; j<lens[i]; j++) { 518 lrows[count++] = values[j] - base; 519 } 520 } 521 ierr = PetscFree(rvalues);CHKERRQ(ierr); 522 ierr = PetscFree(lens);CHKERRQ(ierr); 523 ierr = PetscFree(owner);CHKERRQ(ierr); 524 ierr = PetscFree(nprocs);CHKERRQ(ierr); 525 526 /* actually zap the local rows */ 527 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 528 PetscLogObjectParent(A,istmp); 529 530 /* 531 Zero the required rows. If the "diagonal block" of the matrix 532 is square and the user wishes to set the diagonal we use seperate 533 code so that MatSetValues() is not called for each diagonal allocating 534 new memory, thus calling lots of mallocs and slowing things down. 535 536 Contributed by: Mathew Knepley 537 */ 538 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 539 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 540 if (diag && (l->A->M == l->A->N)) { 541 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 542 } else if (diag) { 543 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 544 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 545 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 546 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 547 } 548 for (i = 0; i < slen; i++) { 549 row = lrows[i] + rstart; 550 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 551 } 552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 } else { 555 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 556 } 557 ierr = ISDestroy(istmp);CHKERRQ(ierr); 558 ierr = PetscFree(lrows);CHKERRQ(ierr); 559 560 /* wait on sends */ 561 if (nsends) { 562 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 563 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 564 ierr = PetscFree(send_status);CHKERRQ(ierr); 565 } 566 ierr = PetscFree(send_waits);CHKERRQ(ierr); 567 ierr = PetscFree(svalues);CHKERRQ(ierr); 568 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatMult_MPIAIJ" 574 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 575 { 576 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 577 int ierr,nt; 578 579 PetscFunctionBegin; 580 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 581 if (nt != A->n) { 582 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 583 } 584 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 586 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatMultAdd_MPIAIJ" 593 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 594 { 595 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 596 int ierr; 597 598 PetscFunctionBegin; 599 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 601 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 608 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 609 { 610 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 611 int ierr; 612 613 PetscFunctionBegin; 614 /* do nondiagonal part */ 615 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 616 /* send it on its way */ 617 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 618 /* do local part */ 619 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 620 /* receive remote parts: note this assumes the values are not actually */ 621 /* inserted in yy until the next line, which is true for my implementation*/ 622 /* but is not perhaps always true. */ 623 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 EXTERN_C_BEGIN 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 630 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 631 { 632 MPI_Comm comm; 633 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 634 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 635 IS Me,Notme; 636 int M,N,first,last,*notme,ntids,i, ierr; 637 638 PetscFunctionBegin; 639 640 /* Easy test: symmetric diagonal block */ 641 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 642 ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr); 643 if (!*f) PetscFunctionReturn(0); 644 ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr); 645 ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr); 646 if (ntids==1) PetscFunctionReturn(0); 647 648 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 649 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 650 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 651 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 652 for (i=0; i<first; i++) notme[i] = i; 653 for (i=last; i<M; i++) notme[i-last+first] = i; 654 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 655 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 656 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 657 Aoff = Aoffs[0]; 658 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 659 Boff = Boffs[0]; 660 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 661 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 662 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 663 ierr = ISDestroy(Me); CHKERRQ(ierr); 664 ierr = ISDestroy(Notme); CHKERRQ(ierr); 665 666 PetscFunctionReturn(0); 667 } 668 EXTERN_C_END 669 670 #undef __FUNCT__ 671 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 672 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 673 { 674 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 675 int ierr; 676 677 PetscFunctionBegin; 678 /* do nondiagonal part */ 679 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 680 /* send it on its way */ 681 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 682 /* do local part */ 683 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 684 /* receive remote parts: note this assumes the values are not actually */ 685 /* inserted in yy until the next line, which is true for my implementation*/ 686 /* but is not perhaps always true. */ 687 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 /* 692 This only works correctly for square matrices where the subblock A->A is the 693 diagonal block 694 */ 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 697 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 698 { 699 int ierr; 700 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 701 702 PetscFunctionBegin; 703 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 704 if (a->rstart != a->cstart || a->rend != a->cend) { 705 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 706 } 707 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatScale_MPIAIJ" 713 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 716 int ierr; 717 718 PetscFunctionBegin; 719 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 720 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 721 PetscFunctionReturn(0); 722 } 723 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatDestroy_MPIAIJ" 726 int MatDestroy_MPIAIJ(Mat mat) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 729 int ierr; 730 731 PetscFunctionBegin; 732 #if defined(PETSC_USE_LOG) 733 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 734 #endif 735 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 736 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 737 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 738 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 739 #if defined (PETSC_USE_CTABLE) 740 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 741 #else 742 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 743 #endif 744 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 745 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 746 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 747 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 748 ierr = PetscFree(aij);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatView_MPIAIJ_Binary" 754 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 757 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 758 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 759 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 760 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 761 PetscScalar *column_values; 762 763 PetscFunctionBegin; 764 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 765 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 766 nz = A->nz + B->nz; 767 if (rank == 0) { 768 header[0] = MAT_FILE_COOKIE; 769 header[1] = mat->M; 770 header[2] = mat->N; 771 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 772 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 773 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 774 /* get largest number of rows any processor has */ 775 rlen = mat->m; 776 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 777 for (i=1; i<size; i++) { 778 rlen = PetscMax(rlen,range[i+1] - range[i]); 779 } 780 } else { 781 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 782 rlen = mat->m; 783 } 784 785 /* load up the local row counts */ 786 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 787 for (i=0; i<mat->m; i++) { 788 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 789 } 790 791 /* store the row lengths to the file */ 792 if (rank == 0) { 793 MPI_Status status; 794 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 795 for (i=1; i<size; i++) { 796 rlen = range[i+1] - range[i]; 797 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 798 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 799 } 800 } else { 801 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 802 } 803 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 804 805 /* load up the local column indices */ 806 nzmax = nz; /* )th processor needs space a largest processor needs */ 807 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 808 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 809 cnt = 0; 810 for (i=0; i<mat->m; i++) { 811 for (j=B->i[i]; j<B->i[i+1]; j++) { 812 if ( (col = garray[B->j[j]]) > cstart) break; 813 column_indices[cnt++] = col; 814 } 815 for (k=A->i[i]; k<A->i[i+1]; k++) { 816 column_indices[cnt++] = A->j[k] + cstart; 817 } 818 for (; j<B->i[i+1]; j++) { 819 column_indices[cnt++] = garray[B->j[j]]; 820 } 821 } 822 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 823 824 /* store the column indices to the file */ 825 if (rank == 0) { 826 MPI_Status status; 827 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 828 for (i=1; i<size; i++) { 829 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 830 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 831 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 832 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 833 } 834 } else { 835 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 836 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 837 } 838 ierr = PetscFree(column_indices);CHKERRQ(ierr); 839 840 /* load up the local column values */ 841 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 842 cnt = 0; 843 for (i=0; i<mat->m; i++) { 844 for (j=B->i[i]; j<B->i[i+1]; j++) { 845 if ( garray[B->j[j]] > cstart) break; 846 column_values[cnt++] = B->a[j]; 847 } 848 for (k=A->i[i]; k<A->i[i+1]; k++) { 849 column_values[cnt++] = A->a[k]; 850 } 851 for (; j<B->i[i+1]; j++) { 852 column_values[cnt++] = B->a[j]; 853 } 854 } 855 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 856 857 /* store the column values to the file */ 858 if (rank == 0) { 859 MPI_Status status; 860 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 861 for (i=1; i<size; i++) { 862 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 863 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 864 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 865 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 866 } 867 } else { 868 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 869 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 870 } 871 ierr = PetscFree(column_values);CHKERRQ(ierr); 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 877 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 878 { 879 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 880 int ierr,rank = aij->rank,size = aij->size; 881 PetscTruth isdraw,isascii,flg,isbinary; 882 PetscViewer sviewer; 883 PetscViewerFormat format; 884 885 PetscFunctionBegin; 886 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 887 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 888 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 889 if (isascii) { 890 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 891 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 892 MatInfo info; 893 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 894 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 895 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 896 if (flg) { 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 898 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 899 } else { 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 901 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 902 } 903 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 904 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 905 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 906 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 907 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 908 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } else if (format == PETSC_VIEWER_ASCII_INFO) { 911 PetscFunctionReturn(0); 912 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 913 PetscFunctionReturn(0); 914 } 915 } else if (isbinary) { 916 if (size == 1) { 917 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 918 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 919 } else { 920 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 921 } 922 PetscFunctionReturn(0); 923 } else if (isdraw) { 924 PetscDraw draw; 925 PetscTruth isnull; 926 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 927 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 928 } 929 930 if (size == 1) { 931 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 932 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 933 } else { 934 /* assemble the entire matrix onto first processor. */ 935 Mat A; 936 Mat_SeqAIJ *Aloc; 937 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 938 PetscScalar *a; 939 940 if (!rank) { 941 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 942 } else { 943 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 944 } 945 PetscLogObjectParent(mat,A); 946 947 /* copy over the A part */ 948 Aloc = (Mat_SeqAIJ*)aij->A->data; 949 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 950 row = aij->rstart; 951 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 952 for (i=0; i<m; i++) { 953 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 954 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 955 } 956 aj = Aloc->j; 957 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 958 959 /* copy over the B part */ 960 Aloc = (Mat_SeqAIJ*)aij->B->data; 961 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 962 row = aij->rstart; 963 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 964 ct = cols; 965 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 966 for (i=0; i<m; i++) { 967 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 968 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 969 } 970 ierr = PetscFree(ct);CHKERRQ(ierr); 971 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 /* 974 Everyone has to call to draw the matrix since the graphics waits are 975 synchronized across all processors that share the PetscDraw object 976 */ 977 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 978 if (!rank) { 979 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 980 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 981 } 982 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 983 ierr = MatDestroy(A);CHKERRQ(ierr); 984 } 985 PetscFunctionReturn(0); 986 } 987 988 #undef __FUNCT__ 989 #define __FUNCT__ "MatView_MPIAIJ" 990 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 991 { 992 int ierr; 993 PetscTruth isascii,isdraw,issocket,isbinary; 994 995 PetscFunctionBegin; 996 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 997 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 998 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 999 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1000 if (isascii || isdraw || isbinary || issocket) { 1001 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1002 } else { 1003 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1004 } 1005 PetscFunctionReturn(0); 1006 } 1007 1008 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatRelax_MPIAIJ" 1012 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1013 { 1014 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1015 int ierr; 1016 Vec bb1; 1017 PetscScalar mone=-1.0; 1018 1019 PetscFunctionBegin; 1020 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1021 1022 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1023 1024 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1025 if (flag & SOR_ZERO_INITIAL_GUESS) { 1026 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1027 its--; 1028 } 1029 1030 while (its--) { 1031 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1033 1034 /* update rhs: bb1 = bb - B*x */ 1035 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1036 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1037 1038 /* local sweep */ 1039 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1040 CHKERRQ(ierr); 1041 } 1042 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1043 if (flag & SOR_ZERO_INITIAL_GUESS) { 1044 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1045 its--; 1046 } 1047 while (its--) { 1048 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1049 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1050 1051 /* update rhs: bb1 = bb - B*x */ 1052 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1053 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1054 1055 /* local sweep */ 1056 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1057 CHKERRQ(ierr); 1058 } 1059 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1060 if (flag & SOR_ZERO_INITIAL_GUESS) { 1061 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1062 its--; 1063 } 1064 while (its--) { 1065 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1066 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1067 1068 /* update rhs: bb1 = bb - B*x */ 1069 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1070 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1071 1072 /* local sweep */ 1073 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1074 CHKERRQ(ierr); 1075 } 1076 } else { 1077 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1078 } 1079 1080 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1086 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1087 { 1088 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1089 Mat A = mat->A,B = mat->B; 1090 int ierr; 1091 PetscReal isend[5],irecv[5]; 1092 1093 PetscFunctionBegin; 1094 info->block_size = 1.0; 1095 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1096 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1097 isend[3] = info->memory; isend[4] = info->mallocs; 1098 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1099 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1100 isend[3] += info->memory; isend[4] += info->mallocs; 1101 if (flag == MAT_LOCAL) { 1102 info->nz_used = isend[0]; 1103 info->nz_allocated = isend[1]; 1104 info->nz_unneeded = isend[2]; 1105 info->memory = isend[3]; 1106 info->mallocs = isend[4]; 1107 } else if (flag == MAT_GLOBAL_MAX) { 1108 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1109 info->nz_used = irecv[0]; 1110 info->nz_allocated = irecv[1]; 1111 info->nz_unneeded = irecv[2]; 1112 info->memory = irecv[3]; 1113 info->mallocs = irecv[4]; 1114 } else if (flag == MAT_GLOBAL_SUM) { 1115 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1116 info->nz_used = irecv[0]; 1117 info->nz_allocated = irecv[1]; 1118 info->nz_unneeded = irecv[2]; 1119 info->memory = irecv[3]; 1120 info->mallocs = irecv[4]; 1121 } 1122 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1123 info->fill_ratio_needed = 0; 1124 info->factor_mallocs = 0; 1125 info->rows_global = (double)matin->M; 1126 info->columns_global = (double)matin->N; 1127 info->rows_local = (double)matin->m; 1128 info->columns_local = (double)matin->N; 1129 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatSetOption_MPIAIJ" 1135 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1136 { 1137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1138 int ierr; 1139 1140 PetscFunctionBegin; 1141 switch (op) { 1142 case MAT_NO_NEW_NONZERO_LOCATIONS: 1143 case MAT_YES_NEW_NONZERO_LOCATIONS: 1144 case MAT_COLUMNS_UNSORTED: 1145 case MAT_COLUMNS_SORTED: 1146 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1147 case MAT_KEEP_ZEROED_ROWS: 1148 case MAT_NEW_NONZERO_LOCATION_ERR: 1149 case MAT_USE_INODES: 1150 case MAT_DO_NOT_USE_INODES: 1151 case MAT_IGNORE_ZERO_ENTRIES: 1152 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1153 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1154 break; 1155 case MAT_ROW_ORIENTED: 1156 a->roworiented = PETSC_TRUE; 1157 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1158 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1159 break; 1160 case MAT_ROWS_SORTED: 1161 case MAT_ROWS_UNSORTED: 1162 case MAT_YES_NEW_DIAGONALS: 1163 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1164 break; 1165 case MAT_COLUMN_ORIENTED: 1166 a->roworiented = PETSC_FALSE; 1167 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1168 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1169 break; 1170 case MAT_IGNORE_OFF_PROC_ENTRIES: 1171 a->donotstash = PETSC_TRUE; 1172 break; 1173 case MAT_NO_NEW_DIAGONALS: 1174 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1175 default: 1176 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatGetRow_MPIAIJ" 1183 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1184 { 1185 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1186 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1187 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1188 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1189 int *cmap,*idx_p; 1190 1191 PetscFunctionBegin; 1192 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1193 mat->getrowactive = PETSC_TRUE; 1194 1195 if (!mat->rowvalues && (idx || v)) { 1196 /* 1197 allocate enough space to hold information from the longest row. 1198 */ 1199 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1200 int max = 1,tmp; 1201 for (i=0; i<matin->m; i++) { 1202 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1203 if (max < tmp) { max = tmp; } 1204 } 1205 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1206 mat->rowindices = (int*)(mat->rowvalues + max); 1207 } 1208 1209 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1210 lrow = row - rstart; 1211 1212 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1213 if (!v) {pvA = 0; pvB = 0;} 1214 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1215 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1216 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1217 nztot = nzA + nzB; 1218 1219 cmap = mat->garray; 1220 if (v || idx) { 1221 if (nztot) { 1222 /* Sort by increasing column numbers, assuming A and B already sorted */ 1223 int imark = -1; 1224 if (v) { 1225 *v = v_p = mat->rowvalues; 1226 for (i=0; i<nzB; i++) { 1227 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1228 else break; 1229 } 1230 imark = i; 1231 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1232 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1233 } 1234 if (idx) { 1235 *idx = idx_p = mat->rowindices; 1236 if (imark > -1) { 1237 for (i=0; i<imark; i++) { 1238 idx_p[i] = cmap[cworkB[i]]; 1239 } 1240 } else { 1241 for (i=0; i<nzB; i++) { 1242 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1243 else break; 1244 } 1245 imark = i; 1246 } 1247 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1248 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1249 } 1250 } else { 1251 if (idx) *idx = 0; 1252 if (v) *v = 0; 1253 } 1254 } 1255 *nz = nztot; 1256 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1257 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1263 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1264 { 1265 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1266 1267 PetscFunctionBegin; 1268 if (aij->getrowactive == PETSC_FALSE) { 1269 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1270 } 1271 aij->getrowactive = PETSC_FALSE; 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatNorm_MPIAIJ" 1277 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1278 { 1279 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1280 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1281 int ierr,i,j,cstart = aij->cstart; 1282 PetscReal sum = 0.0; 1283 PetscScalar *v; 1284 1285 PetscFunctionBegin; 1286 if (aij->size == 1) { 1287 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1288 } else { 1289 if (type == NORM_FROBENIUS) { 1290 v = amat->a; 1291 for (i=0; i<amat->nz; i++) { 1292 #if defined(PETSC_USE_COMPLEX) 1293 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1294 #else 1295 sum += (*v)*(*v); v++; 1296 #endif 1297 } 1298 v = bmat->a; 1299 for (i=0; i<bmat->nz; i++) { 1300 #if defined(PETSC_USE_COMPLEX) 1301 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1302 #else 1303 sum += (*v)*(*v); v++; 1304 #endif 1305 } 1306 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1307 *norm = sqrt(*norm); 1308 } else if (type == NORM_1) { /* max column norm */ 1309 PetscReal *tmp,*tmp2; 1310 int *jj,*garray = aij->garray; 1311 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1312 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1313 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1314 *norm = 0.0; 1315 v = amat->a; jj = amat->j; 1316 for (j=0; j<amat->nz; j++) { 1317 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1318 } 1319 v = bmat->a; jj = bmat->j; 1320 for (j=0; j<bmat->nz; j++) { 1321 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1322 } 1323 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1324 for (j=0; j<mat->N; j++) { 1325 if (tmp2[j] > *norm) *norm = tmp2[j]; 1326 } 1327 ierr = PetscFree(tmp);CHKERRQ(ierr); 1328 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1329 } else if (type == NORM_INFINITY) { /* max row norm */ 1330 PetscReal ntemp = 0.0; 1331 for (j=0; j<aij->A->m; j++) { 1332 v = amat->a + amat->i[j]; 1333 sum = 0.0; 1334 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1335 sum += PetscAbsScalar(*v); v++; 1336 } 1337 v = bmat->a + bmat->i[j]; 1338 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1339 sum += PetscAbsScalar(*v); v++; 1340 } 1341 if (sum > ntemp) ntemp = sum; 1342 } 1343 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1344 } else { 1345 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1346 } 1347 } 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatTranspose_MPIAIJ" 1353 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1354 { 1355 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1356 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1357 int ierr; 1358 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1359 Mat B; 1360 PetscScalar *array; 1361 1362 PetscFunctionBegin; 1363 if (!matout && M != N) { 1364 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1365 } 1366 1367 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1368 1369 /* copy over the A part */ 1370 Aloc = (Mat_SeqAIJ*)a->A->data; 1371 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1372 row = a->rstart; 1373 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1374 for (i=0; i<m; i++) { 1375 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1376 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1377 } 1378 aj = Aloc->j; 1379 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1380 1381 /* copy over the B part */ 1382 Aloc = (Mat_SeqAIJ*)a->B->data; 1383 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1384 row = a->rstart; 1385 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1386 ct = cols; 1387 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1388 for (i=0; i<m; i++) { 1389 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1390 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1391 } 1392 ierr = PetscFree(ct);CHKERRQ(ierr); 1393 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1394 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 if (matout) { 1396 *matout = B; 1397 } else { 1398 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1405 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1406 { 1407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1408 Mat a = aij->A,b = aij->B; 1409 int ierr,s1,s2,s3; 1410 1411 PetscFunctionBegin; 1412 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1413 if (rr) { 1414 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1415 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1416 /* Overlap communication with computation. */ 1417 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1418 } 1419 if (ll) { 1420 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1421 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1422 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1423 } 1424 /* scale the diagonal block */ 1425 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1426 1427 if (rr) { 1428 /* Do a scatter end and then right scale the off-diagonal block */ 1429 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1430 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1431 } 1432 1433 PetscFunctionReturn(0); 1434 } 1435 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1439 int MatPrintHelp_MPIAIJ(Mat A) 1440 { 1441 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1442 int ierr; 1443 1444 PetscFunctionBegin; 1445 if (!a->rank) { 1446 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1447 } 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1453 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1454 { 1455 PetscFunctionBegin; 1456 *bs = 1; 1457 PetscFunctionReturn(0); 1458 } 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1461 int MatSetUnfactored_MPIAIJ(Mat A) 1462 { 1463 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1464 int ierr; 1465 1466 PetscFunctionBegin; 1467 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatEqual_MPIAIJ" 1473 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1474 { 1475 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1476 Mat a,b,c,d; 1477 PetscTruth flg; 1478 int ierr; 1479 1480 PetscFunctionBegin; 1481 a = matA->A; b = matA->B; 1482 c = matB->A; d = matB->B; 1483 1484 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1485 if (flg == PETSC_TRUE) { 1486 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1487 } 1488 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1489 PetscFunctionReturn(0); 1490 } 1491 1492 #undef __FUNCT__ 1493 #define __FUNCT__ "MatCopy_MPIAIJ" 1494 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1495 { 1496 int ierr; 1497 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1498 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1499 1500 PetscFunctionBegin; 1501 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1502 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1503 /* because of the column compression in the off-processor part of the matrix a->B, 1504 the number of columns in a->B and b->B may be different, hence we cannot call 1505 the MatCopy() directly on the two parts. If need be, we can provide a more 1506 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1507 then copying the submatrices */ 1508 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1509 } else { 1510 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1511 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1512 } 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1518 int MatSetUpPreallocation_MPIAIJ(Mat A) 1519 { 1520 int ierr; 1521 1522 PetscFunctionBegin; 1523 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1528 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS [],int); 1529 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1530 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 1531 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1532 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1533 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1534 #endif 1535 1536 #include "petscblaslapack.h" 1537 extern int MatAXPY_SeqAIJ(const PetscScalar[],Mat,Mat,MatStructure); 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatAXPY_MPIAIJ" 1540 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1541 { 1542 int ierr,one=1,i; 1543 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1544 Mat_SeqAIJ *x,*y; 1545 1546 PetscFunctionBegin; 1547 if (str == SAME_NONZERO_PATTERN) { 1548 x = (Mat_SeqAIJ *)xx->A->data; 1549 y = (Mat_SeqAIJ *)yy->A->data; 1550 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1551 x = (Mat_SeqAIJ *)xx->B->data; 1552 y = (Mat_SeqAIJ *)yy->B->data; 1553 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1554 } else if (str == SUBSET_NONZERO_PATTERN) { 1555 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1556 1557 x = (Mat_SeqAIJ *)xx->B->data; 1558 y = (Mat_SeqAIJ *)yy->B->data; 1559 if (y->xtoy && y->XtoY != xx->B) { 1560 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1561 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1562 } 1563 if (!y->xtoy) { /* get xtoy */ 1564 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1565 y->XtoY = xx->B; 1566 } 1567 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1568 } else { 1569 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1570 } 1571 PetscFunctionReturn(0); 1572 } 1573 1574 /* -------------------------------------------------------------------*/ 1575 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1576 MatGetRow_MPIAIJ, 1577 MatRestoreRow_MPIAIJ, 1578 MatMult_MPIAIJ, 1579 MatMultAdd_MPIAIJ, 1580 MatMultTranspose_MPIAIJ, 1581 MatMultTransposeAdd_MPIAIJ, 1582 0, 1583 0, 1584 0, 1585 0, 1586 0, 1587 0, 1588 MatRelax_MPIAIJ, 1589 MatTranspose_MPIAIJ, 1590 MatGetInfo_MPIAIJ, 1591 MatEqual_MPIAIJ, 1592 MatGetDiagonal_MPIAIJ, 1593 MatDiagonalScale_MPIAIJ, 1594 MatNorm_MPIAIJ, 1595 MatAssemblyBegin_MPIAIJ, 1596 MatAssemblyEnd_MPIAIJ, 1597 0, 1598 MatSetOption_MPIAIJ, 1599 MatZeroEntries_MPIAIJ, 1600 MatZeroRows_MPIAIJ, 1601 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1602 MatLUFactorSymbolic_MPIAIJ_TFS, 1603 #else 1604 0, 1605 #endif 1606 0, 1607 0, 1608 0, 1609 MatSetUpPreallocation_MPIAIJ, 1610 0, 1611 0, 1612 0, 1613 0, 1614 MatDuplicate_MPIAIJ, 1615 0, 1616 0, 1617 0, 1618 0, 1619 MatAXPY_MPIAIJ, 1620 MatGetSubMatrices_MPIAIJ, 1621 MatIncreaseOverlap_MPIAIJ, 1622 MatGetValues_MPIAIJ, 1623 MatCopy_MPIAIJ, 1624 MatPrintHelp_MPIAIJ, 1625 MatScale_MPIAIJ, 1626 0, 1627 0, 1628 0, 1629 MatGetBlockSize_MPIAIJ, 1630 0, 1631 0, 1632 0, 1633 0, 1634 MatFDColoringCreate_MPIAIJ, 1635 0, 1636 MatSetUnfactored_MPIAIJ, 1637 0, 1638 0, 1639 MatGetSubMatrix_MPIAIJ, 1640 MatDestroy_MPIAIJ, 1641 MatView_MPIAIJ, 1642 MatGetPetscMaps_Petsc, 1643 0, 1644 0, 1645 0, 1646 0, 1647 0, 1648 0, 1649 0, 1650 0, 1651 MatSetColoring_MPIAIJ, 1652 MatSetValuesAdic_MPIAIJ, 1653 MatSetValuesAdifor_MPIAIJ 1654 }; 1655 1656 /* ----------------------------------------------------------------------------------------*/ 1657 1658 EXTERN_C_BEGIN 1659 #undef __FUNCT__ 1660 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1661 int MatStoreValues_MPIAIJ(Mat mat) 1662 { 1663 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1664 int ierr; 1665 1666 PetscFunctionBegin; 1667 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1668 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1669 PetscFunctionReturn(0); 1670 } 1671 EXTERN_C_END 1672 1673 EXTERN_C_BEGIN 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1676 int MatRetrieveValues_MPIAIJ(Mat mat) 1677 { 1678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1679 int ierr; 1680 1681 PetscFunctionBegin; 1682 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1683 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 EXTERN_C_END 1687 1688 #include "petscpc.h" 1689 EXTERN_C_BEGIN 1690 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1691 EXTERN int MatDiagonalScaleLocal_MPIAIJ(Mat,Vec); 1692 EXTERN_C_END 1693 1694 EXTERN_C_BEGIN 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1697 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1698 { 1699 Mat_MPIAIJ *b; 1700 int ierr,i; 1701 1702 PetscFunctionBegin; 1703 B->preallocated = PETSC_TRUE; 1704 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1705 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1706 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1707 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1708 if (d_nnz) { 1709 for (i=0; i<B->m; i++) { 1710 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1711 } 1712 } 1713 if (o_nnz) { 1714 for (i=0; i<B->m; i++) { 1715 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1716 } 1717 } 1718 b = (Mat_MPIAIJ*)B->data; 1719 1720 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1721 PetscLogObjectParent(B,b->A); 1722 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1723 PetscLogObjectParent(B,b->B); 1724 1725 PetscFunctionReturn(0); 1726 } 1727 EXTERN_C_END 1728 1729 EXTERN_C_BEGIN 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatCreate_MPIAIJ" 1732 int MatCreate_MPIAIJ(Mat B) 1733 { 1734 Mat_MPIAIJ *b; 1735 int ierr,i,size; 1736 1737 PetscFunctionBegin; 1738 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1739 1740 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1741 B->data = (void*)b; 1742 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1743 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1744 B->factor = 0; 1745 B->assembled = PETSC_FALSE; 1746 B->mapping = 0; 1747 1748 B->insertmode = NOT_SET_VALUES; 1749 b->size = size; 1750 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1751 1752 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1753 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1754 1755 /* the information in the maps duplicates the information computed below, eventually 1756 we should remove the duplicate information that is not contained in the maps */ 1757 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1758 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1759 1760 /* build local table of row and column ownerships */ 1761 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1762 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1763 b->cowners = b->rowners + b->size + 2; 1764 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1765 b->rowners[0] = 0; 1766 for (i=2; i<=b->size; i++) { 1767 b->rowners[i] += b->rowners[i-1]; 1768 } 1769 b->rstart = b->rowners[b->rank]; 1770 b->rend = b->rowners[b->rank+1]; 1771 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1772 b->cowners[0] = 0; 1773 for (i=2; i<=b->size; i++) { 1774 b->cowners[i] += b->cowners[i-1]; 1775 } 1776 b->cstart = b->cowners[b->rank]; 1777 b->cend = b->cowners[b->rank+1]; 1778 1779 /* build cache for off array entries formed */ 1780 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1781 b->donotstash = PETSC_FALSE; 1782 b->colmap = 0; 1783 b->garray = 0; 1784 b->roworiented = PETSC_TRUE; 1785 1786 /* stuff used for matrix vector multiply */ 1787 b->lvec = PETSC_NULL; 1788 b->Mvctx = PETSC_NULL; 1789 1790 /* stuff for MatGetRow() */ 1791 b->rowindices = 0; 1792 b->rowvalues = 0; 1793 b->getrowactive = PETSC_FALSE; 1794 1795 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1796 "MatStoreValues_MPIAIJ", 1797 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1798 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1799 "MatRetrieveValues_MPIAIJ", 1800 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1801 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1802 "MatGetDiagonalBlock_MPIAIJ", 1803 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1804 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1805 "MatIsSymmetric_MPIAIJ", 1806 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1807 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1808 "MatMPIAIJSetPreallocation_MPIAIJ", 1809 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1810 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1811 "MatDiagonalScaleLocal_MPIAIJ", 1812 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 EXTERN_C_END 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1819 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1820 { 1821 Mat mat; 1822 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1823 int ierr; 1824 1825 PetscFunctionBegin; 1826 *newmat = 0; 1827 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1828 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1829 a = (Mat_MPIAIJ*)mat->data; 1830 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1831 mat->factor = matin->factor; 1832 mat->assembled = PETSC_TRUE; 1833 mat->insertmode = NOT_SET_VALUES; 1834 mat->preallocated = PETSC_TRUE; 1835 1836 a->rstart = oldmat->rstart; 1837 a->rend = oldmat->rend; 1838 a->cstart = oldmat->cstart; 1839 a->cend = oldmat->cend; 1840 a->size = oldmat->size; 1841 a->rank = oldmat->rank; 1842 a->donotstash = oldmat->donotstash; 1843 a->roworiented = oldmat->roworiented; 1844 a->rowindices = 0; 1845 a->rowvalues = 0; 1846 a->getrowactive = PETSC_FALSE; 1847 1848 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1849 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1850 if (oldmat->colmap) { 1851 #if defined (PETSC_USE_CTABLE) 1852 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1853 #else 1854 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1855 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1856 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1857 #endif 1858 } else a->colmap = 0; 1859 if (oldmat->garray) { 1860 int len; 1861 len = oldmat->B->n; 1862 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1863 PetscLogObjectMemory(mat,len*sizeof(int)); 1864 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1865 } else a->garray = 0; 1866 1867 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1868 PetscLogObjectParent(mat,a->lvec); 1869 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1870 PetscLogObjectParent(mat,a->Mvctx); 1871 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1872 PetscLogObjectParent(mat,a->A); 1873 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1874 PetscLogObjectParent(mat,a->B); 1875 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1876 *newmat = mat; 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #include "petscsys.h" 1881 1882 EXTERN_C_BEGIN 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatLoad_MPIAIJ" 1885 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1886 { 1887 Mat A; 1888 PetscScalar *vals,*svals; 1889 MPI_Comm comm = ((PetscObject)viewer)->comm; 1890 MPI_Status status; 1891 int i,nz,ierr,j,rstart,rend,fd; 1892 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1893 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1894 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1895 1896 PetscFunctionBegin; 1897 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1898 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1899 if (!rank) { 1900 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1901 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1902 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1903 if (header[3] < 0) { 1904 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1905 } 1906 } 1907 1908 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1909 M = header[1]; N = header[2]; 1910 /* determine ownership of all rows */ 1911 m = M/size + ((M % size) > rank); 1912 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1913 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1914 rowners[0] = 0; 1915 for (i=2; i<=size; i++) { 1916 rowners[i] += rowners[i-1]; 1917 } 1918 rstart = rowners[rank]; 1919 rend = rowners[rank+1]; 1920 1921 /* distribute row lengths to all processors */ 1922 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1923 offlens = ourlens + (rend-rstart); 1924 if (!rank) { 1925 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1926 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1927 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1928 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1929 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1930 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1931 } else { 1932 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1933 } 1934 1935 if (!rank) { 1936 /* calculate the number of nonzeros on each processor */ 1937 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1938 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1939 for (i=0; i<size; i++) { 1940 for (j=rowners[i]; j< rowners[i+1]; j++) { 1941 procsnz[i] += rowlengths[j]; 1942 } 1943 } 1944 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1945 1946 /* determine max buffer needed and allocate it */ 1947 maxnz = 0; 1948 for (i=0; i<size; i++) { 1949 maxnz = PetscMax(maxnz,procsnz[i]); 1950 } 1951 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1952 1953 /* read in my part of the matrix column indices */ 1954 nz = procsnz[0]; 1955 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1956 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1957 1958 /* read in every one elses and ship off */ 1959 for (i=1; i<size; i++) { 1960 nz = procsnz[i]; 1961 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1962 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1963 } 1964 ierr = PetscFree(cols);CHKERRQ(ierr); 1965 } else { 1966 /* determine buffer space needed for message */ 1967 nz = 0; 1968 for (i=0; i<m; i++) { 1969 nz += ourlens[i]; 1970 } 1971 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1972 1973 /* receive message of column indices*/ 1974 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1975 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1976 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1977 } 1978 1979 /* determine column ownership if matrix is not square */ 1980 if (N != M) { 1981 n = N/size + ((N % size) > rank); 1982 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1983 cstart = cend - n; 1984 } else { 1985 cstart = rstart; 1986 cend = rend; 1987 n = cend - cstart; 1988 } 1989 1990 /* loop over local rows, determining number of off diagonal entries */ 1991 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1992 jj = 0; 1993 for (i=0; i<m; i++) { 1994 for (j=0; j<ourlens[i]; j++) { 1995 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1996 jj++; 1997 } 1998 } 1999 2000 /* create our matrix */ 2001 for (i=0; i<m; i++) { 2002 ourlens[i] -= offlens[i]; 2003 } 2004 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2005 ierr = MatSetType(A,type);CHKERRQ(ierr); 2006 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2007 2008 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2009 for (i=0; i<m; i++) { 2010 ourlens[i] += offlens[i]; 2011 } 2012 2013 if (!rank) { 2014 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2015 2016 /* read in my part of the matrix numerical values */ 2017 nz = procsnz[0]; 2018 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2019 2020 /* insert into matrix */ 2021 jj = rstart; 2022 smycols = mycols; 2023 svals = vals; 2024 for (i=0; i<m; i++) { 2025 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2026 smycols += ourlens[i]; 2027 svals += ourlens[i]; 2028 jj++; 2029 } 2030 2031 /* read in other processors and ship out */ 2032 for (i=1; i<size; i++) { 2033 nz = procsnz[i]; 2034 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2035 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2036 } 2037 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2038 } else { 2039 /* receive numeric values */ 2040 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2041 2042 /* receive message of values*/ 2043 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2044 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2045 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2046 2047 /* insert into matrix */ 2048 jj = rstart; 2049 smycols = mycols; 2050 svals = vals; 2051 for (i=0; i<m; i++) { 2052 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2053 smycols += ourlens[i]; 2054 svals += ourlens[i]; 2055 jj++; 2056 } 2057 } 2058 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2059 ierr = PetscFree(vals);CHKERRQ(ierr); 2060 ierr = PetscFree(mycols);CHKERRQ(ierr); 2061 ierr = PetscFree(rowners);CHKERRQ(ierr); 2062 2063 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2064 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2065 *newmat = A; 2066 PetscFunctionReturn(0); 2067 } 2068 EXTERN_C_END 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2072 /* 2073 Not great since it makes two copies of the submatrix, first an SeqAIJ 2074 in local and then by concatenating the local matrices the end result. 2075 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2076 */ 2077 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2078 { 2079 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2080 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2081 Mat *local,M,Mreuse; 2082 PetscScalar *vwork,*aa; 2083 MPI_Comm comm = mat->comm; 2084 Mat_SeqAIJ *aij; 2085 2086 2087 PetscFunctionBegin; 2088 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2089 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2090 2091 if (call == MAT_REUSE_MATRIX) { 2092 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2093 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2094 local = &Mreuse; 2095 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2096 } else { 2097 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2098 Mreuse = *local; 2099 ierr = PetscFree(local);CHKERRQ(ierr); 2100 } 2101 2102 /* 2103 m - number of local rows 2104 n - number of columns (same on all processors) 2105 rstart - first row in new global matrix generated 2106 */ 2107 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2108 if (call == MAT_INITIAL_MATRIX) { 2109 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2110 ii = aij->i; 2111 jj = aij->j; 2112 2113 /* 2114 Determine the number of non-zeros in the diagonal and off-diagonal 2115 portions of the matrix in order to do correct preallocation 2116 */ 2117 2118 /* first get start and end of "diagonal" columns */ 2119 if (csize == PETSC_DECIDE) { 2120 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2121 if (mglobal == n) { /* square matrix */ 2122 nlocal = m; 2123 } else { 2124 nlocal = n/size + ((n % size) > rank); 2125 } 2126 } else { 2127 nlocal = csize; 2128 } 2129 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2130 rstart = rend - nlocal; 2131 if (rank == size - 1 && rend != n) { 2132 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2133 } 2134 2135 /* next, compute all the lengths */ 2136 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2137 olens = dlens + m; 2138 for (i=0; i<m; i++) { 2139 jend = ii[i+1] - ii[i]; 2140 olen = 0; 2141 dlen = 0; 2142 for (j=0; j<jend; j++) { 2143 if (*jj < rstart || *jj >= rend) olen++; 2144 else dlen++; 2145 jj++; 2146 } 2147 olens[i] = olen; 2148 dlens[i] = dlen; 2149 } 2150 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2151 ierr = PetscFree(dlens);CHKERRQ(ierr); 2152 } else { 2153 int ml,nl; 2154 2155 M = *newmat; 2156 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2157 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2158 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2159 /* 2160 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2161 rather than the slower MatSetValues(). 2162 */ 2163 M->was_assembled = PETSC_TRUE; 2164 M->assembled = PETSC_FALSE; 2165 } 2166 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2167 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2168 ii = aij->i; 2169 jj = aij->j; 2170 aa = aij->a; 2171 for (i=0; i<m; i++) { 2172 row = rstart + i; 2173 nz = ii[i+1] - ii[i]; 2174 cwork = jj; jj += nz; 2175 vwork = aa; aa += nz; 2176 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2177 } 2178 2179 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2180 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2181 *newmat = M; 2182 2183 /* save submatrix used in processor for next request */ 2184 if (call == MAT_INITIAL_MATRIX) { 2185 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2186 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2187 } 2188 2189 PetscFunctionReturn(0); 2190 } 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2194 /*@C 2195 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2196 (the default parallel PETSc format). For good matrix assembly performance 2197 the user should preallocate the matrix storage by setting the parameters 2198 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2199 performance can be increased by more than a factor of 50. 2200 2201 Collective on MPI_Comm 2202 2203 Input Parameters: 2204 + A - the matrix 2205 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2206 (same value is used for all local rows) 2207 . d_nnz - array containing the number of nonzeros in the various rows of the 2208 DIAGONAL portion of the local submatrix (possibly different for each row) 2209 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2210 The size of this array is equal to the number of local rows, i.e 'm'. 2211 You must leave room for the diagonal entry even if it is zero. 2212 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2213 submatrix (same value is used for all local rows). 2214 - o_nnz - array containing the number of nonzeros in the various rows of the 2215 OFF-DIAGONAL portion of the local submatrix (possibly different for 2216 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2217 structure. The size of this array is equal to the number 2218 of local rows, i.e 'm'. 2219 2220 The AIJ format (also called the Yale sparse matrix format or 2221 compressed row storage), is fully compatible with standard Fortran 77 2222 storage. That is, the stored row and column indices can begin at 2223 either one (as in Fortran) or zero. See the users manual for details. 2224 2225 The user MUST specify either the local or global matrix dimensions 2226 (possibly both). 2227 2228 The parallel matrix is partitioned such that the first m0 rows belong to 2229 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2230 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2231 2232 The DIAGONAL portion of the local submatrix of a processor can be defined 2233 as the submatrix which is obtained by extraction the part corresponding 2234 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2235 first row that belongs to the processor, and r2 is the last row belonging 2236 to the this processor. This is a square mxm matrix. The remaining portion 2237 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2238 2239 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2240 2241 By default, this format uses inodes (identical nodes) when possible. 2242 We search for consecutive rows with the same nonzero structure, thereby 2243 reusing matrix information to achieve increased efficiency. 2244 2245 Options Database Keys: 2246 + -mat_aij_no_inode - Do not use inodes 2247 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2248 - -mat_aij_oneindex - Internally use indexing starting at 1 2249 rather than 0. Note that when calling MatSetValues(), 2250 the user still MUST index entries starting at 0! 2251 2252 Example usage: 2253 2254 Consider the following 8x8 matrix with 34 non-zero values, that is 2255 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2256 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2257 as follows: 2258 2259 .vb 2260 1 2 0 | 0 3 0 | 0 4 2261 Proc0 0 5 6 | 7 0 0 | 8 0 2262 9 0 10 | 11 0 0 | 12 0 2263 ------------------------------------- 2264 13 0 14 | 15 16 17 | 0 0 2265 Proc1 0 18 0 | 19 20 21 | 0 0 2266 0 0 0 | 22 23 0 | 24 0 2267 ------------------------------------- 2268 Proc2 25 26 27 | 0 0 28 | 29 0 2269 30 0 0 | 31 32 33 | 0 34 2270 .ve 2271 2272 This can be represented as a collection of submatrices as: 2273 2274 .vb 2275 A B C 2276 D E F 2277 G H I 2278 .ve 2279 2280 Where the submatrices A,B,C are owned by proc0, D,E,F are 2281 owned by proc1, G,H,I are owned by proc2. 2282 2283 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2284 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2285 The 'M','N' parameters are 8,8, and have the same values on all procs. 2286 2287 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2288 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2289 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2290 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2291 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2292 matrix, ans [DF] as another SeqAIJ matrix. 2293 2294 When d_nz, o_nz parameters are specified, d_nz storage elements are 2295 allocated for every row of the local diagonal submatrix, and o_nz 2296 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2297 One way to choose d_nz and o_nz is to use the max nonzerors per local 2298 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2299 In this case, the values of d_nz,o_nz are: 2300 .vb 2301 proc0 : dnz = 2, o_nz = 2 2302 proc1 : dnz = 3, o_nz = 2 2303 proc2 : dnz = 1, o_nz = 4 2304 .ve 2305 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2306 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2307 for proc3. i.e we are using 12+15+10=37 storage locations to store 2308 34 values. 2309 2310 When d_nnz, o_nnz parameters are specified, the storage is specified 2311 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2312 In the above case the values for d_nnz,o_nnz are: 2313 .vb 2314 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2315 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2316 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2317 .ve 2318 Here the space allocated is sum of all the above values i.e 34, and 2319 hence pre-allocation is perfect. 2320 2321 Level: intermediate 2322 2323 .keywords: matrix, aij, compressed row, sparse, parallel 2324 2325 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2326 @*/ 2327 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2328 { 2329 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2330 2331 PetscFunctionBegin; 2332 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2333 if (f) { 2334 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2335 } 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatCreateMPIAIJ" 2341 /*@C 2342 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2343 (the default parallel PETSc format). For good matrix assembly performance 2344 the user should preallocate the matrix storage by setting the parameters 2345 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2346 performance can be increased by more than a factor of 50. 2347 2348 Collective on MPI_Comm 2349 2350 Input Parameters: 2351 + comm - MPI communicator 2352 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2353 This value should be the same as the local size used in creating the 2354 y vector for the matrix-vector product y = Ax. 2355 . n - This value should be the same as the local size used in creating the 2356 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2357 calculated if N is given) For square matrices n is almost always m. 2358 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2359 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2360 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2361 (same value is used for all local rows) 2362 . d_nnz - array containing the number of nonzeros in the various rows of the 2363 DIAGONAL portion of the local submatrix (possibly different for each row) 2364 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2365 The size of this array is equal to the number of local rows, i.e 'm'. 2366 You must leave room for the diagonal entry even if it is zero. 2367 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2368 submatrix (same value is used for all local rows). 2369 - o_nnz - array containing the number of nonzeros in the various rows of the 2370 OFF-DIAGONAL portion of the local submatrix (possibly different for 2371 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2372 structure. The size of this array is equal to the number 2373 of local rows, i.e 'm'. 2374 2375 Output Parameter: 2376 . A - the matrix 2377 2378 Notes: 2379 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2380 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2381 storage requirements for this matrix. 2382 2383 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2384 processor than it must be used on all processors that share the object for 2385 that argument. 2386 2387 The AIJ format (also called the Yale sparse matrix format or 2388 compressed row storage), is fully compatible with standard Fortran 77 2389 storage. That is, the stored row and column indices can begin at 2390 either one (as in Fortran) or zero. See the users manual for details. 2391 2392 The user MUST specify either the local or global matrix dimensions 2393 (possibly both). 2394 2395 The parallel matrix is partitioned such that the first m0 rows belong to 2396 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2397 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2398 2399 The DIAGONAL portion of the local submatrix of a processor can be defined 2400 as the submatrix which is obtained by extraction the part corresponding 2401 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2402 first row that belongs to the processor, and r2 is the last row belonging 2403 to the this processor. This is a square mxm matrix. The remaining portion 2404 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2405 2406 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2407 2408 When calling this routine with a single process communicator, a matrix of 2409 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2410 type of communicator, use the construction mechanism: 2411 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2412 2413 By default, this format uses inodes (identical nodes) when possible. 2414 We search for consecutive rows with the same nonzero structure, thereby 2415 reusing matrix information to achieve increased efficiency. 2416 2417 Options Database Keys: 2418 + -mat_aij_no_inode - Do not use inodes 2419 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2420 - -mat_aij_oneindex - Internally use indexing starting at 1 2421 rather than 0. Note that when calling MatSetValues(), 2422 the user still MUST index entries starting at 0! 2423 2424 2425 Example usage: 2426 2427 Consider the following 8x8 matrix with 34 non-zero values, that is 2428 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2429 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2430 as follows: 2431 2432 .vb 2433 1 2 0 | 0 3 0 | 0 4 2434 Proc0 0 5 6 | 7 0 0 | 8 0 2435 9 0 10 | 11 0 0 | 12 0 2436 ------------------------------------- 2437 13 0 14 | 15 16 17 | 0 0 2438 Proc1 0 18 0 | 19 20 21 | 0 0 2439 0 0 0 | 22 23 0 | 24 0 2440 ------------------------------------- 2441 Proc2 25 26 27 | 0 0 28 | 29 0 2442 30 0 0 | 31 32 33 | 0 34 2443 .ve 2444 2445 This can be represented as a collection of submatrices as: 2446 2447 .vb 2448 A B C 2449 D E F 2450 G H I 2451 .ve 2452 2453 Where the submatrices A,B,C are owned by proc0, D,E,F are 2454 owned by proc1, G,H,I are owned by proc2. 2455 2456 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2457 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2458 The 'M','N' parameters are 8,8, and have the same values on all procs. 2459 2460 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2461 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2462 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2463 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2464 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2465 matrix, ans [DF] as another SeqAIJ matrix. 2466 2467 When d_nz, o_nz parameters are specified, d_nz storage elements are 2468 allocated for every row of the local diagonal submatrix, and o_nz 2469 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2470 One way to choose d_nz and o_nz is to use the max nonzerors per local 2471 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2472 In this case, the values of d_nz,o_nz are: 2473 .vb 2474 proc0 : dnz = 2, o_nz = 2 2475 proc1 : dnz = 3, o_nz = 2 2476 proc2 : dnz = 1, o_nz = 4 2477 .ve 2478 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2479 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2480 for proc3. i.e we are using 12+15+10=37 storage locations to store 2481 34 values. 2482 2483 When d_nnz, o_nnz parameters are specified, the storage is specified 2484 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2485 In the above case the values for d_nnz,o_nnz are: 2486 .vb 2487 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2488 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2489 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2490 .ve 2491 Here the space allocated is sum of all the above values i.e 34, and 2492 hence pre-allocation is perfect. 2493 2494 Level: intermediate 2495 2496 .keywords: matrix, aij, compressed row, sparse, parallel 2497 2498 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2499 @*/ 2500 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2501 { 2502 int ierr,size; 2503 2504 PetscFunctionBegin; 2505 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2506 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2507 if (size > 1) { 2508 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2509 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2510 } else { 2511 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2512 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2513 } 2514 PetscFunctionReturn(0); 2515 } 2516 2517 #undef __FUNCT__ 2518 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2519 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2520 { 2521 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2522 PetscFunctionBegin; 2523 *Ad = a->A; 2524 *Ao = a->B; 2525 *colmap = a->garray; 2526 PetscFunctionReturn(0); 2527 } 2528 2529 #undef __FUNCT__ 2530 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2531 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2532 { 2533 int ierr,i; 2534 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2535 2536 PetscFunctionBegin; 2537 if (coloring->ctype == IS_COLORING_LOCAL) { 2538 ISColoringValue *allcolors,*colors; 2539 ISColoring ocoloring; 2540 2541 /* set coloring for diagonal portion */ 2542 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2543 2544 /* set coloring for off-diagonal portion */ 2545 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2546 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2547 for (i=0; i<a->B->n; i++) { 2548 colors[i] = allcolors[a->garray[i]]; 2549 } 2550 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2551 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2552 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2553 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2554 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2555 ISColoringValue *colors; 2556 int *larray; 2557 ISColoring ocoloring; 2558 2559 /* set coloring for diagonal portion */ 2560 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2561 for (i=0; i<a->A->n; i++) { 2562 larray[i] = i + a->cstart; 2563 } 2564 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2565 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2566 for (i=0; i<a->A->n; i++) { 2567 colors[i] = coloring->colors[larray[i]]; 2568 } 2569 ierr = PetscFree(larray);CHKERRQ(ierr); 2570 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2571 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2572 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2573 2574 /* set coloring for off-diagonal portion */ 2575 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2576 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2577 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2578 for (i=0; i<a->B->n; i++) { 2579 colors[i] = coloring->colors[larray[i]]; 2580 } 2581 ierr = PetscFree(larray);CHKERRQ(ierr); 2582 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2583 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2584 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2585 } else { 2586 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2587 } 2588 2589 PetscFunctionReturn(0); 2590 } 2591 2592 #undef __FUNCT__ 2593 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2594 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2595 { 2596 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2597 int ierr; 2598 2599 PetscFunctionBegin; 2600 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2601 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2602 PetscFunctionReturn(0); 2603 } 2604 2605 #undef __FUNCT__ 2606 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2607 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2608 { 2609 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2610 int ierr; 2611 2612 PetscFunctionBegin; 2613 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2614 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2615 PetscFunctionReturn(0); 2616 } 2617 2618 #undef __FUNCT__ 2619 #define __FUNCT__ "MatMerge" 2620 /*@C 2621 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2622 matrices from each processor 2623 2624 Collective on MPI_Comm 2625 2626 Input Parameters: 2627 + comm - the communicators the parallel matrix will live on 2628 - inmat - the input sequential matrices 2629 2630 Output Parameter: 2631 . outmat - the parallel matrix generated 2632 2633 Level: advanced 2634 2635 Notes: The number of columns of the matrix in EACH of the seperate files 2636 MUST be the same. 2637 2638 @*/ 2639 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2640 { 2641 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2642 PetscScalar *values; 2643 PetscMap columnmap,rowmap; 2644 2645 PetscFunctionBegin; 2646 2647 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2648 2649 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2650 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2651 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2652 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2653 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2654 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2655 2656 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2657 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2658 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2659 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2660 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2661 2662 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2663 for (i=0;i<m;i++) { 2664 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2665 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2666 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2667 } 2668 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2669 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2670 2671 for (i=0;i<m;i++) { 2672 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2673 I = i + rstart; 2674 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2675 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2676 } 2677 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2678 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2679 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2680 2681 PetscFunctionReturn(0); 2682 } 2683 2684 #undef __FUNCT__ 2685 #define __FUNCT__ "MatFileSplit" 2686 int MatFileSplit(Mat A,char *outfile) 2687 { 2688 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2689 PetscViewer out; 2690 char *name; 2691 Mat B; 2692 PetscScalar *values; 2693 2694 PetscFunctionBegin; 2695 2696 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2697 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2698 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2699 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2700 for (i=0;i<m;i++) { 2701 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2702 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2703 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2704 } 2705 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2706 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2707 2708 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2709 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2710 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2711 sprintf(name,"%s.%d",outfile,rank); 2712 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2713 ierr = PetscFree(name); 2714 ierr = MatView(B,out);CHKERRQ(ierr); 2715 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2716 ierr = MatDestroy(B);CHKERRQ(ierr); 2717 PetscFunctionReturn(0); 2718 } 2719