1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 /* 8 Local utility routine that creates a mapping from the global column 9 number to the local number in the off-diagonal part of the local 10 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 11 a slightly higher hash table cost; without it it is not scalable (each processor 12 has an order N integer array but is fast to acess. 13 */ 14 #undef __FUNCT__ 15 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 19 int n = aij->B->n,i,ierr; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 29 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 #define CHUNKSIZE 15 37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 38 { \ 39 \ 40 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 41 rmax = aimax[row]; nrow = ailen[row]; \ 42 col1 = col - shift; \ 43 \ 44 low = 0; high = nrow; \ 45 while (high-low > 5) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > col) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > col1) break; \ 52 if (rp[_i] == col1) { \ 53 if (addv == ADD_VALUES) ap[_i] += value; \ 54 else ap[_i] = value; \ 55 goto a_noinsert; \ 56 } \ 57 } \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (int*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(int));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (nonew == 1) goto b_noinsert; \ 133 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 134 if (nrow >= rmax) { \ 135 /* there is no extra room in row, therefore enlarge */ \ 136 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 137 PetscScalar *new_a; \ 138 \ 139 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 140 \ 141 /* malloc new storage space */ \ 142 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 143 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 144 new_j = (int*)(new_a + new_nz); \ 145 new_i = new_j + new_nz; \ 146 \ 147 /* copy over old data into new slots */ \ 148 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 149 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 150 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 151 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 152 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 153 len*sizeof(int));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 156 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 157 /* free up old matrix storage */ \ 158 \ 159 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 160 if (!b->singlemalloc) { \ 161 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 162 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 163 } \ 164 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 165 b->singlemalloc = PETSC_TRUE; \ 166 \ 167 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 168 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 169 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 170 b->maxnz += CHUNKSIZE; \ 171 b->reallocs++; \ 172 } \ 173 N = nrow++ - 1; b->nz++; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ap[ii+1] = ap[ii]; \ 178 } \ 179 rp[_i] = col1; \ 180 ap[_i] = value; \ 181 b_noinsert: ; \ 182 bilen[row] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIAIJ" 187 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 190 PetscScalar value; 191 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 192 int cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 int nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 277 int cstart = aij->cstart,cend = aij->cend,row,col; 278 279 PetscFunctionBegin; 280 for (i=0; i<m; i++) { 281 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 282 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 283 if (idxm[i] >= rstart && idxm[i] < rend) { 284 row = idxm[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 287 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 288 if (idxn[j] >= cstart && idxn[j] < cend){ 289 col = idxn[j] - cstart; 290 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 291 } else { 292 if (!aij->colmap) { 293 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 294 } 295 #if defined (PETSC_USE_CTABLE) 296 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 297 col --; 298 #else 299 col = aij->colmap[idxn[j]] - 1; 300 #endif 301 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 302 else { 303 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 304 } 305 } 306 } 307 } else { 308 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 309 } 310 } 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 316 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 317 { 318 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 319 int ierr,nstash,reallocs; 320 InsertMode addv; 321 322 PetscFunctionBegin; 323 if (aij->donotstash) { 324 PetscFunctionReturn(0); 325 } 326 327 /* make sure all processors are either in INSERTMODE or ADDMODE */ 328 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 329 if (addv == (ADD_VALUES|INSERT_VALUES)) { 330 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 331 } 332 mat->insertmode = addv; /* in case this processor had no cache */ 333 334 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 335 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 336 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 337 PetscFunctionReturn(0); 338 } 339 340 341 #undef __FUNCT__ 342 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 343 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 344 { 345 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 346 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 347 int i,j,rstart,ncols,n,ierr,flg; 348 int *row,*col,other_disassembled; 349 PetscScalar *val; 350 InsertMode addv = mat->insertmode; 351 352 PetscFunctionBegin; 353 if (!aij->donotstash) { 354 while (1) { 355 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 356 if (!flg) break; 357 358 for (i=0; i<n;) { 359 /* Now identify the consecutive vals belonging to the same row */ 360 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 361 if (j < n) ncols = j-i; 362 else ncols = n-i; 363 /* Now assemble all these values with a single function call */ 364 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 365 i = j; 366 } 367 } 368 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 369 } 370 371 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 372 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 373 374 /* determine if any processor has disassembled, if so we must 375 also disassemble ourselfs, in order that we may reassemble. */ 376 /* 377 if nonzero structure of submatrix B cannot change then we know that 378 no processor disassembled thus we can skip this stuff 379 */ 380 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 381 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 382 if (mat->was_assembled && !other_disassembled) { 383 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 384 } 385 } 386 387 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 388 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 389 } 390 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 391 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 392 393 if (aij->rowvalues) { 394 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 395 aij->rowvalues = 0; 396 } 397 398 /* used by MatAXPY() */ 399 a->xtoy = 0; b->xtoy = 0; 400 a->XtoY = 0; b->XtoY = 0; 401 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 407 int MatZeroEntries_MPIAIJ(Mat A) 408 { 409 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 410 int ierr; 411 412 PetscFunctionBegin; 413 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 414 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatZeroRows_MPIAIJ" 420 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 421 { 422 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 423 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 424 int *nprocs,j,idx,nsends,row; 425 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 426 int *rvalues,tag = A->tag,count,base,slen,n,*source; 427 int *lens,imdex,*lrows,*values,rstart=l->rstart; 428 MPI_Comm comm = A->comm; 429 MPI_Request *send_waits,*recv_waits; 430 MPI_Status recv_status,*send_status; 431 IS istmp; 432 PetscTruth found; 433 434 PetscFunctionBegin; 435 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 436 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 437 438 /* first count number of contributors to each processor */ 439 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 440 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 441 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 442 for (i=0; i<N; i++) { 443 idx = rows[i]; 444 found = PETSC_FALSE; 445 for (j=0; j<size; j++) { 446 if (idx >= owners[j] && idx < owners[j+1]) { 447 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 448 } 449 } 450 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 451 } 452 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 453 454 /* inform other processors of number of messages and max length*/ 455 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 456 457 /* post receives: */ 458 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 459 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 460 for (i=0; i<nrecvs; i++) { 461 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 462 } 463 464 /* do sends: 465 1) starts[i] gives the starting index in svalues for stuff going to 466 the ith processor 467 */ 468 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 469 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 470 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 471 starts[0] = 0; 472 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 473 for (i=0; i<N; i++) { 474 svalues[starts[owner[i]]++] = rows[i]; 475 } 476 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 477 478 starts[0] = 0; 479 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 480 count = 0; 481 for (i=0; i<size; i++) { 482 if (nprocs[2*i+1]) { 483 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 484 } 485 } 486 ierr = PetscFree(starts);CHKERRQ(ierr); 487 488 base = owners[rank]; 489 490 /* wait on receives */ 491 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 492 source = lens + nrecvs; 493 count = nrecvs; slen = 0; 494 while (count) { 495 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 496 /* unpack receives into our local space */ 497 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 498 source[imdex] = recv_status.MPI_SOURCE; 499 lens[imdex] = n; 500 slen += n; 501 count--; 502 } 503 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 504 505 /* move the data into the send scatter */ 506 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 507 count = 0; 508 for (i=0; i<nrecvs; i++) { 509 values = rvalues + i*nmax; 510 for (j=0; j<lens[i]; j++) { 511 lrows[count++] = values[j] - base; 512 } 513 } 514 ierr = PetscFree(rvalues);CHKERRQ(ierr); 515 ierr = PetscFree(lens);CHKERRQ(ierr); 516 ierr = PetscFree(owner);CHKERRQ(ierr); 517 ierr = PetscFree(nprocs);CHKERRQ(ierr); 518 519 /* actually zap the local rows */ 520 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 521 PetscLogObjectParent(A,istmp); 522 523 /* 524 Zero the required rows. If the "diagonal block" of the matrix 525 is square and the user wishes to set the diagonal we use seperate 526 code so that MatSetValues() is not called for each diagonal allocating 527 new memory, thus calling lots of mallocs and slowing things down. 528 529 Contributed by: Mathew Knepley 530 */ 531 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 532 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 533 if (diag && (l->A->M == l->A->N)) { 534 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 535 } else if (diag) { 536 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 537 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 538 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 539 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 540 } 541 for (i = 0; i < slen; i++) { 542 row = lrows[i] + rstart; 543 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 544 } 545 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 547 } else { 548 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 549 } 550 ierr = ISDestroy(istmp);CHKERRQ(ierr); 551 ierr = PetscFree(lrows);CHKERRQ(ierr); 552 553 /* wait on sends */ 554 if (nsends) { 555 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 556 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 557 ierr = PetscFree(send_status);CHKERRQ(ierr); 558 } 559 ierr = PetscFree(send_waits);CHKERRQ(ierr); 560 ierr = PetscFree(svalues);CHKERRQ(ierr); 561 562 PetscFunctionReturn(0); 563 } 564 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatMult_MPIAIJ" 567 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 568 { 569 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 570 int ierr,nt; 571 572 PetscFunctionBegin; 573 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 574 if (nt != A->n) { 575 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 576 } 577 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 578 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 579 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 580 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatMultAdd_MPIAIJ" 586 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 587 { 588 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 589 int ierr; 590 591 PetscFunctionBegin; 592 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 594 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 595 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 601 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 602 { 603 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 604 int ierr; 605 606 PetscFunctionBegin; 607 /* do nondiagonal part */ 608 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 609 /* send it on its way */ 610 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 611 /* do local part */ 612 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 613 /* receive remote parts: note this assumes the values are not actually */ 614 /* inserted in yy until the next line, which is true for my implementation*/ 615 /* but is not perhaps always true. */ 616 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 PetscFunctionReturn(0); 618 } 619 620 EXTERN_C_BEGIN 621 #undef __FUNCT__ 622 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 623 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 624 { 625 MPI_Comm comm; 626 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 627 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 628 IS Me,Notme; 629 int M,N,first,last,*notme,ntids,i, ierr; 630 631 PetscFunctionBegin; 632 633 /* Easy test: symmetric diagonal block */ 634 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 635 ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr); 636 if (!*f) PetscFunctionReturn(0); 637 ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr); 638 ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr); 639 if (ntids==1) PetscFunctionReturn(0); 640 641 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 642 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 643 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 644 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 645 for (i=0; i<first; i++) notme[i] = i; 646 for (i=last; i<M; i++) notme[i-last+first] = i; 647 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 648 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 649 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 650 Aoff = Aoffs[0]; 651 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 652 Boff = Boffs[0]; 653 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 654 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 655 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 656 ierr = ISDestroy(Me); CHKERRQ(ierr); 657 ierr = ISDestroy(Notme); CHKERRQ(ierr); 658 659 PetscFunctionReturn(0); 660 } 661 EXTERN_C_END 662 663 #undef __FUNCT__ 664 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 665 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 666 { 667 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 668 int ierr; 669 670 PetscFunctionBegin; 671 /* do nondiagonal part */ 672 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 673 /* send it on its way */ 674 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 675 /* do local part */ 676 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 677 /* receive remote parts: note this assumes the values are not actually */ 678 /* inserted in yy until the next line, which is true for my implementation*/ 679 /* but is not perhaps always true. */ 680 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 684 /* 685 This only works correctly for square matrices where the subblock A->A is the 686 diagonal block 687 */ 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 690 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 691 { 692 int ierr; 693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 694 695 PetscFunctionBegin; 696 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 697 if (a->rstart != a->cstart || a->rend != a->cend) { 698 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 699 } 700 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatScale_MPIAIJ" 706 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 707 { 708 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 709 int ierr; 710 711 PetscFunctionBegin; 712 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 713 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } 716 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatDestroy_MPIAIJ" 719 int MatDestroy_MPIAIJ(Mat mat) 720 { 721 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 722 int ierr; 723 724 PetscFunctionBegin; 725 #if defined(PETSC_USE_LOG) 726 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 727 #endif 728 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 729 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 730 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 731 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 732 #if defined (PETSC_USE_CTABLE) 733 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 734 #else 735 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 736 #endif 737 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 738 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 739 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 740 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 741 ierr = PetscFree(aij);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatView_MPIAIJ_Binary" 747 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 748 { 749 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 750 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 751 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 752 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 753 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 754 PetscScalar *column_values; 755 756 PetscFunctionBegin; 757 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 758 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 759 nz = A->nz + B->nz; 760 if (rank == 0) { 761 header[0] = MAT_FILE_COOKIE; 762 header[1] = mat->M; 763 header[2] = mat->N; 764 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 765 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 766 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 767 /* get largest number of rows any processor has */ 768 rlen = mat->m; 769 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 770 for (i=1; i<size; i++) { 771 rlen = PetscMax(rlen,range[i+1] - range[i]); 772 } 773 } else { 774 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 775 rlen = mat->m; 776 } 777 778 /* load up the local row counts */ 779 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 780 for (i=0; i<mat->m; i++) { 781 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 782 } 783 784 /* store the row lengths to the file */ 785 if (rank == 0) { 786 MPI_Status status; 787 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 788 for (i=1; i<size; i++) { 789 rlen = range[i+1] - range[i]; 790 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 791 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 792 } 793 } else { 794 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 795 } 796 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 797 798 /* load up the local column indices */ 799 nzmax = nz; /* )th processor needs space a largest processor needs */ 800 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 801 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 802 cnt = 0; 803 for (i=0; i<mat->m; i++) { 804 for (j=B->i[i]; j<B->i[i+1]; j++) { 805 if ( (col = garray[B->j[j]]) > cstart) break; 806 column_indices[cnt++] = col; 807 } 808 for (k=A->i[i]; k<A->i[i+1]; k++) { 809 column_indices[cnt++] = A->j[k] + cstart; 810 } 811 for (; j<B->i[i+1]; j++) { 812 column_indices[cnt++] = garray[B->j[j]]; 813 } 814 } 815 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 816 817 /* store the column indices to the file */ 818 if (rank == 0) { 819 MPI_Status status; 820 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 821 for (i=1; i<size; i++) { 822 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 824 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 825 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 826 } 827 } else { 828 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 830 } 831 ierr = PetscFree(column_indices);CHKERRQ(ierr); 832 833 /* load up the local column values */ 834 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 835 cnt = 0; 836 for (i=0; i<mat->m; i++) { 837 for (j=B->i[i]; j<B->i[i+1]; j++) { 838 if ( garray[B->j[j]] > cstart) break; 839 column_values[cnt++] = B->a[j]; 840 } 841 for (k=A->i[i]; k<A->i[i+1]; k++) { 842 column_values[cnt++] = A->a[k]; 843 } 844 for (; j<B->i[i+1]; j++) { 845 column_values[cnt++] = B->a[j]; 846 } 847 } 848 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 849 850 /* store the column values to the file */ 851 if (rank == 0) { 852 MPI_Status status; 853 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 854 for (i=1; i<size; i++) { 855 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 857 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 858 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 859 } 860 } else { 861 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 862 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 863 } 864 ierr = PetscFree(column_values);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 870 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 871 { 872 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 873 int ierr,rank = aij->rank,size = aij->size; 874 PetscTruth isdraw,isascii,flg,isbinary; 875 PetscViewer sviewer; 876 PetscViewerFormat format; 877 878 PetscFunctionBegin; 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 881 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 882 if (isascii) { 883 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 884 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 885 MatInfo info; 886 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 889 if (flg) { 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 891 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 892 } else { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 894 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 895 } 896 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 898 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_INFO) { 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 906 PetscFunctionReturn(0); 907 } 908 } else if (isbinary) { 909 if (size == 1) { 910 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 911 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 912 } else { 913 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 914 } 915 PetscFunctionReturn(0); 916 } else if (isdraw) { 917 PetscDraw draw; 918 PetscTruth isnull; 919 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 920 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 921 } 922 923 if (size == 1) { 924 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 925 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 926 } else { 927 /* assemble the entire matrix onto first processor. */ 928 Mat A; 929 Mat_SeqAIJ *Aloc; 930 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 931 PetscScalar *a; 932 933 if (!rank) { 934 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 935 } else { 936 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 937 } 938 PetscLogObjectParent(mat,A); 939 940 /* copy over the A part */ 941 Aloc = (Mat_SeqAIJ*)aij->A->data; 942 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 943 row = aij->rstart; 944 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 945 for (i=0; i<m; i++) { 946 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 947 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 948 } 949 aj = Aloc->j; 950 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 951 952 /* copy over the B part */ 953 Aloc = (Mat_SeqAIJ*)aij->B->data; 954 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 955 row = aij->rstart; 956 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 957 ct = cols; 958 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 959 for (i=0; i<m; i++) { 960 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 961 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 962 } 963 ierr = PetscFree(ct);CHKERRQ(ierr); 964 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 /* 967 Everyone has to call to draw the matrix since the graphics waits are 968 synchronized across all processors that share the PetscDraw object 969 */ 970 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 971 if (!rank) { 972 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 973 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 974 } 975 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 976 ierr = MatDestroy(A);CHKERRQ(ierr); 977 } 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatView_MPIAIJ" 983 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 984 { 985 int ierr; 986 PetscTruth isascii,isdraw,issocket,isbinary; 987 988 PetscFunctionBegin; 989 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 990 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 991 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 992 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 993 if (isascii || isdraw || isbinary || issocket) { 994 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 995 } else { 996 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatRelax_MPIAIJ" 1005 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1006 { 1007 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1008 int ierr; 1009 Vec bb1; 1010 PetscScalar mone=-1.0; 1011 1012 PetscFunctionBegin; 1013 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1014 1015 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1016 1017 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1018 if (flag & SOR_ZERO_INITIAL_GUESS) { 1019 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1020 its--; 1021 } 1022 1023 while (its--) { 1024 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1026 1027 /* update rhs: bb1 = bb - B*x */ 1028 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1029 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1030 1031 /* local sweep */ 1032 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1033 CHKERRQ(ierr); 1034 } 1035 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1036 if (flag & SOR_ZERO_INITIAL_GUESS) { 1037 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1038 its--; 1039 } 1040 while (its--) { 1041 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1042 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1043 1044 /* update rhs: bb1 = bb - B*x */ 1045 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1046 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1047 1048 /* local sweep */ 1049 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1050 CHKERRQ(ierr); 1051 } 1052 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1053 if (flag & SOR_ZERO_INITIAL_GUESS) { 1054 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1055 its--; 1056 } 1057 while (its--) { 1058 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1059 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1060 1061 /* update rhs: bb1 = bb - B*x */ 1062 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1063 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1064 1065 /* local sweep */ 1066 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1067 CHKERRQ(ierr); 1068 } 1069 } else { 1070 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1071 } 1072 1073 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1079 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1080 { 1081 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1082 Mat A = mat->A,B = mat->B; 1083 int ierr; 1084 PetscReal isend[5],irecv[5]; 1085 1086 PetscFunctionBegin; 1087 info->block_size = 1.0; 1088 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1089 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1090 isend[3] = info->memory; isend[4] = info->mallocs; 1091 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1092 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1093 isend[3] += info->memory; isend[4] += info->mallocs; 1094 if (flag == MAT_LOCAL) { 1095 info->nz_used = isend[0]; 1096 info->nz_allocated = isend[1]; 1097 info->nz_unneeded = isend[2]; 1098 info->memory = isend[3]; 1099 info->mallocs = isend[4]; 1100 } else if (flag == MAT_GLOBAL_MAX) { 1101 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1102 info->nz_used = irecv[0]; 1103 info->nz_allocated = irecv[1]; 1104 info->nz_unneeded = irecv[2]; 1105 info->memory = irecv[3]; 1106 info->mallocs = irecv[4]; 1107 } else if (flag == MAT_GLOBAL_SUM) { 1108 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1109 info->nz_used = irecv[0]; 1110 info->nz_allocated = irecv[1]; 1111 info->nz_unneeded = irecv[2]; 1112 info->memory = irecv[3]; 1113 info->mallocs = irecv[4]; 1114 } 1115 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1116 info->fill_ratio_needed = 0; 1117 info->factor_mallocs = 0; 1118 info->rows_global = (double)matin->M; 1119 info->columns_global = (double)matin->N; 1120 info->rows_local = (double)matin->m; 1121 info->columns_local = (double)matin->N; 1122 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatSetOption_MPIAIJ" 1128 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1129 { 1130 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1131 int ierr; 1132 1133 PetscFunctionBegin; 1134 switch (op) { 1135 case MAT_NO_NEW_NONZERO_LOCATIONS: 1136 case MAT_YES_NEW_NONZERO_LOCATIONS: 1137 case MAT_COLUMNS_UNSORTED: 1138 case MAT_COLUMNS_SORTED: 1139 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1140 case MAT_KEEP_ZEROED_ROWS: 1141 case MAT_NEW_NONZERO_LOCATION_ERR: 1142 case MAT_USE_INODES: 1143 case MAT_DO_NOT_USE_INODES: 1144 case MAT_IGNORE_ZERO_ENTRIES: 1145 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1146 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1147 break; 1148 case MAT_ROW_ORIENTED: 1149 a->roworiented = PETSC_TRUE; 1150 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1151 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1152 break; 1153 case MAT_ROWS_SORTED: 1154 case MAT_ROWS_UNSORTED: 1155 case MAT_YES_NEW_DIAGONALS: 1156 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1157 break; 1158 case MAT_COLUMN_ORIENTED: 1159 a->roworiented = PETSC_FALSE; 1160 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1161 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1162 break; 1163 case MAT_IGNORE_OFF_PROC_ENTRIES: 1164 a->donotstash = PETSC_TRUE; 1165 break; 1166 case MAT_NO_NEW_DIAGONALS: 1167 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1168 default: 1169 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1170 } 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatGetRow_MPIAIJ" 1176 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1177 { 1178 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1179 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1180 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1181 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1182 int *cmap,*idx_p; 1183 1184 PetscFunctionBegin; 1185 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1186 mat->getrowactive = PETSC_TRUE; 1187 1188 if (!mat->rowvalues && (idx || v)) { 1189 /* 1190 allocate enough space to hold information from the longest row. 1191 */ 1192 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1193 int max = 1,tmp; 1194 for (i=0; i<matin->m; i++) { 1195 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1196 if (max < tmp) { max = tmp; } 1197 } 1198 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1199 mat->rowindices = (int*)(mat->rowvalues + max); 1200 } 1201 1202 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1203 lrow = row - rstart; 1204 1205 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1206 if (!v) {pvA = 0; pvB = 0;} 1207 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1208 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1209 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1210 nztot = nzA + nzB; 1211 1212 cmap = mat->garray; 1213 if (v || idx) { 1214 if (nztot) { 1215 /* Sort by increasing column numbers, assuming A and B already sorted */ 1216 int imark = -1; 1217 if (v) { 1218 *v = v_p = mat->rowvalues; 1219 for (i=0; i<nzB; i++) { 1220 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1221 else break; 1222 } 1223 imark = i; 1224 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1225 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1226 } 1227 if (idx) { 1228 *idx = idx_p = mat->rowindices; 1229 if (imark > -1) { 1230 for (i=0; i<imark; i++) { 1231 idx_p[i] = cmap[cworkB[i]]; 1232 } 1233 } else { 1234 for (i=0; i<nzB; i++) { 1235 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1236 else break; 1237 } 1238 imark = i; 1239 } 1240 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1241 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1242 } 1243 } else { 1244 if (idx) *idx = 0; 1245 if (v) *v = 0; 1246 } 1247 } 1248 *nz = nztot; 1249 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1250 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1256 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1257 { 1258 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1259 1260 PetscFunctionBegin; 1261 if (aij->getrowactive == PETSC_FALSE) { 1262 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1263 } 1264 aij->getrowactive = PETSC_FALSE; 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatNorm_MPIAIJ" 1270 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1274 int ierr,i,j,cstart = aij->cstart; 1275 PetscReal sum = 0.0; 1276 PetscScalar *v; 1277 1278 PetscFunctionBegin; 1279 if (aij->size == 1) { 1280 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1281 } else { 1282 if (type == NORM_FROBENIUS) { 1283 v = amat->a; 1284 for (i=0; i<amat->nz; i++) { 1285 #if defined(PETSC_USE_COMPLEX) 1286 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1287 #else 1288 sum += (*v)*(*v); v++; 1289 #endif 1290 } 1291 v = bmat->a; 1292 for (i=0; i<bmat->nz; i++) { 1293 #if defined(PETSC_USE_COMPLEX) 1294 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1295 #else 1296 sum += (*v)*(*v); v++; 1297 #endif 1298 } 1299 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1300 *norm = sqrt(*norm); 1301 } else if (type == NORM_1) { /* max column norm */ 1302 PetscReal *tmp,*tmp2; 1303 int *jj,*garray = aij->garray; 1304 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1305 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1306 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1307 *norm = 0.0; 1308 v = amat->a; jj = amat->j; 1309 for (j=0; j<amat->nz; j++) { 1310 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1311 } 1312 v = bmat->a; jj = bmat->j; 1313 for (j=0; j<bmat->nz; j++) { 1314 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1315 } 1316 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1317 for (j=0; j<mat->N; j++) { 1318 if (tmp2[j] > *norm) *norm = tmp2[j]; 1319 } 1320 ierr = PetscFree(tmp);CHKERRQ(ierr); 1321 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1322 } else if (type == NORM_INFINITY) { /* max row norm */ 1323 PetscReal ntemp = 0.0; 1324 for (j=0; j<aij->A->m; j++) { 1325 v = amat->a + amat->i[j]; 1326 sum = 0.0; 1327 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1328 sum += PetscAbsScalar(*v); v++; 1329 } 1330 v = bmat->a + bmat->i[j]; 1331 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1332 sum += PetscAbsScalar(*v); v++; 1333 } 1334 if (sum > ntemp) ntemp = sum; 1335 } 1336 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1337 } else { 1338 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1339 } 1340 } 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatTranspose_MPIAIJ" 1346 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1347 { 1348 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1349 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1350 int ierr; 1351 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1352 Mat B; 1353 PetscScalar *array; 1354 1355 PetscFunctionBegin; 1356 if (!matout && M != N) { 1357 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1358 } 1359 1360 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1361 1362 /* copy over the A part */ 1363 Aloc = (Mat_SeqAIJ*)a->A->data; 1364 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1365 row = a->rstart; 1366 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1367 for (i=0; i<m; i++) { 1368 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1369 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1370 } 1371 aj = Aloc->j; 1372 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1373 1374 /* copy over the B part */ 1375 Aloc = (Mat_SeqAIJ*)a->B->data; 1376 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1377 row = a->rstart; 1378 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1379 ct = cols; 1380 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1381 for (i=0; i<m; i++) { 1382 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1383 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1384 } 1385 ierr = PetscFree(ct);CHKERRQ(ierr); 1386 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1387 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1388 if (matout) { 1389 *matout = B; 1390 } else { 1391 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1392 } 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1398 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1399 { 1400 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1401 Mat a = aij->A,b = aij->B; 1402 int ierr,s1,s2,s3; 1403 1404 PetscFunctionBegin; 1405 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1406 if (rr) { 1407 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1408 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1409 /* Overlap communication with computation. */ 1410 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1411 } 1412 if (ll) { 1413 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1414 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1415 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1416 } 1417 /* scale the diagonal block */ 1418 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1419 1420 if (rr) { 1421 /* Do a scatter end and then right scale the off-diagonal block */ 1422 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1423 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1424 } 1425 1426 PetscFunctionReturn(0); 1427 } 1428 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1432 int MatPrintHelp_MPIAIJ(Mat A) 1433 { 1434 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1435 int ierr; 1436 1437 PetscFunctionBegin; 1438 if (!a->rank) { 1439 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1440 } 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1446 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1447 { 1448 PetscFunctionBegin; 1449 *bs = 1; 1450 PetscFunctionReturn(0); 1451 } 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1454 int MatSetUnfactored_MPIAIJ(Mat A) 1455 { 1456 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1457 int ierr; 1458 1459 PetscFunctionBegin; 1460 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatEqual_MPIAIJ" 1466 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1467 { 1468 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1469 Mat a,b,c,d; 1470 PetscTruth flg; 1471 int ierr; 1472 1473 PetscFunctionBegin; 1474 a = matA->A; b = matA->B; 1475 c = matB->A; d = matB->B; 1476 1477 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1478 if (flg == PETSC_TRUE) { 1479 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1480 } 1481 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatCopy_MPIAIJ" 1487 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1488 { 1489 int ierr; 1490 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1491 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1492 1493 PetscFunctionBegin; 1494 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1495 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1496 /* because of the column compression in the off-processor part of the matrix a->B, 1497 the number of columns in a->B and b->B may be different, hence we cannot call 1498 the MatCopy() directly on the two parts. If need be, we can provide a more 1499 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1500 then copying the submatrices */ 1501 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1502 } else { 1503 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1504 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1505 } 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1511 int MatSetUpPreallocation_MPIAIJ(Mat A) 1512 { 1513 int ierr; 1514 1515 PetscFunctionBegin; 1516 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #include "petscblaslapack.h" 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatAXPY_MPIAIJ" 1523 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1524 { 1525 int ierr,one=1,i; 1526 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1527 Mat_SeqAIJ *x,*y; 1528 1529 PetscFunctionBegin; 1530 if (str == SAME_NONZERO_PATTERN) { 1531 x = (Mat_SeqAIJ *)xx->A->data; 1532 y = (Mat_SeqAIJ *)yy->A->data; 1533 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1534 x = (Mat_SeqAIJ *)xx->B->data; 1535 y = (Mat_SeqAIJ *)yy->B->data; 1536 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1537 } else if (str == SUBSET_NONZERO_PATTERN) { 1538 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1539 1540 x = (Mat_SeqAIJ *)xx->B->data; 1541 y = (Mat_SeqAIJ *)yy->B->data; 1542 if (y->xtoy && y->XtoY != xx->B) { 1543 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1544 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1545 } 1546 if (!y->xtoy) { /* get xtoy */ 1547 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1548 y->XtoY = xx->B; 1549 } 1550 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1551 } else { 1552 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1553 } 1554 PetscFunctionReturn(0); 1555 } 1556 1557 /* -------------------------------------------------------------------*/ 1558 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1559 MatGetRow_MPIAIJ, 1560 MatRestoreRow_MPIAIJ, 1561 MatMult_MPIAIJ, 1562 /* 4*/ MatMultAdd_MPIAIJ, 1563 MatMultTranspose_MPIAIJ, 1564 MatMultTransposeAdd_MPIAIJ, 1565 0, 1566 0, 1567 0, 1568 /*10*/ 0, 1569 0, 1570 0, 1571 MatRelax_MPIAIJ, 1572 MatTranspose_MPIAIJ, 1573 /*15*/ MatGetInfo_MPIAIJ, 1574 MatEqual_MPIAIJ, 1575 MatGetDiagonal_MPIAIJ, 1576 MatDiagonalScale_MPIAIJ, 1577 MatNorm_MPIAIJ, 1578 /*20*/ MatAssemblyBegin_MPIAIJ, 1579 MatAssemblyEnd_MPIAIJ, 1580 0, 1581 MatSetOption_MPIAIJ, 1582 MatZeroEntries_MPIAIJ, 1583 /*25*/ MatZeroRows_MPIAIJ, 1584 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1585 MatLUFactorSymbolic_MPIAIJ_TFS, 1586 #else 1587 0, 1588 #endif 1589 0, 1590 0, 1591 0, 1592 /*30*/ MatSetUpPreallocation_MPIAIJ, 1593 0, 1594 0, 1595 0, 1596 0, 1597 /*35*/ MatDuplicate_MPIAIJ, 1598 0, 1599 0, 1600 0, 1601 0, 1602 /*40*/ MatAXPY_MPIAIJ, 1603 MatGetSubMatrices_MPIAIJ, 1604 MatIncreaseOverlap_MPIAIJ, 1605 MatGetValues_MPIAIJ, 1606 MatCopy_MPIAIJ, 1607 /*45*/ MatPrintHelp_MPIAIJ, 1608 MatScale_MPIAIJ, 1609 0, 1610 0, 1611 0, 1612 /*50*/ MatGetBlockSize_MPIAIJ, 1613 0, 1614 0, 1615 0, 1616 0, 1617 /*55*/ MatFDColoringCreate_MPIAIJ, 1618 0, 1619 MatSetUnfactored_MPIAIJ, 1620 0, 1621 0, 1622 /*60*/ MatGetSubMatrix_MPIAIJ, 1623 MatDestroy_MPIAIJ, 1624 MatView_MPIAIJ, 1625 MatGetPetscMaps_Petsc, 1626 0, 1627 /*65*/ 0, 1628 0, 1629 0, 1630 0, 1631 0, 1632 /*70*/ 0, 1633 0, 1634 MatSetColoring_MPIAIJ, 1635 MatSetValuesAdic_MPIAIJ, 1636 MatSetValuesAdifor_MPIAIJ, 1637 /*75*/ 0, 1638 0, 1639 0, 1640 0, 1641 0, 1642 /*80*/ 0, 1643 0, 1644 0, 1645 0, 1646 0, 1647 /*85*/ MatLoad_MPIAIJ}; 1648 1649 /* ----------------------------------------------------------------------------------------*/ 1650 1651 EXTERN_C_BEGIN 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1654 int MatStoreValues_MPIAIJ(Mat mat) 1655 { 1656 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1657 int ierr; 1658 1659 PetscFunctionBegin; 1660 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1661 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1662 PetscFunctionReturn(0); 1663 } 1664 EXTERN_C_END 1665 1666 EXTERN_C_BEGIN 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1669 int MatRetrieveValues_MPIAIJ(Mat mat) 1670 { 1671 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1672 int ierr; 1673 1674 PetscFunctionBegin; 1675 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1676 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 EXTERN_C_END 1680 1681 #include "petscpc.h" 1682 EXTERN_C_BEGIN 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1685 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1686 { 1687 Mat_MPIAIJ *b; 1688 int ierr,i; 1689 1690 PetscFunctionBegin; 1691 B->preallocated = PETSC_TRUE; 1692 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1693 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1694 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1695 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1696 if (d_nnz) { 1697 for (i=0; i<B->m; i++) { 1698 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1699 } 1700 } 1701 if (o_nnz) { 1702 for (i=0; i<B->m; i++) { 1703 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1704 } 1705 } 1706 b = (Mat_MPIAIJ*)B->data; 1707 1708 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1709 PetscLogObjectParent(B,b->A); 1710 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1711 PetscLogObjectParent(B,b->B); 1712 1713 PetscFunctionReturn(0); 1714 } 1715 EXTERN_C_END 1716 1717 /*MC 1718 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1719 1720 Options Database Keys: 1721 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1722 1723 Level: beginner 1724 1725 .seealso: MatCreateMPIAIJ 1726 M*/ 1727 1728 EXTERN_C_BEGIN 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatCreate_MPIAIJ" 1731 int MatCreate_MPIAIJ(Mat B) 1732 { 1733 Mat_MPIAIJ *b; 1734 int ierr,i,size; 1735 1736 PetscFunctionBegin; 1737 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1738 1739 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1740 B->data = (void*)b; 1741 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1742 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1743 B->factor = 0; 1744 B->assembled = PETSC_FALSE; 1745 B->mapping = 0; 1746 1747 B->insertmode = NOT_SET_VALUES; 1748 b->size = size; 1749 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1750 1751 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1752 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1753 1754 /* the information in the maps duplicates the information computed below, eventually 1755 we should remove the duplicate information that is not contained in the maps */ 1756 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1757 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1758 1759 /* build local table of row and column ownerships */ 1760 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1761 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1762 b->cowners = b->rowners + b->size + 2; 1763 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1764 b->rowners[0] = 0; 1765 for (i=2; i<=b->size; i++) { 1766 b->rowners[i] += b->rowners[i-1]; 1767 } 1768 b->rstart = b->rowners[b->rank]; 1769 b->rend = b->rowners[b->rank+1]; 1770 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1771 b->cowners[0] = 0; 1772 for (i=2; i<=b->size; i++) { 1773 b->cowners[i] += b->cowners[i-1]; 1774 } 1775 b->cstart = b->cowners[b->rank]; 1776 b->cend = b->cowners[b->rank+1]; 1777 1778 /* build cache for off array entries formed */ 1779 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1780 b->donotstash = PETSC_FALSE; 1781 b->colmap = 0; 1782 b->garray = 0; 1783 b->roworiented = PETSC_TRUE; 1784 1785 /* stuff used for matrix vector multiply */ 1786 b->lvec = PETSC_NULL; 1787 b->Mvctx = PETSC_NULL; 1788 1789 /* stuff for MatGetRow() */ 1790 b->rowindices = 0; 1791 b->rowvalues = 0; 1792 b->getrowactive = PETSC_FALSE; 1793 1794 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1795 "MatStoreValues_MPIAIJ", 1796 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1797 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1798 "MatRetrieveValues_MPIAIJ", 1799 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1800 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1801 "MatGetDiagonalBlock_MPIAIJ", 1802 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1803 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1804 "MatIsSymmetric_MPIAIJ", 1805 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1806 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1807 "MatMPIAIJSetPreallocation_MPIAIJ", 1808 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1809 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1810 "MatDiagonalScaleLocal_MPIAIJ", 1811 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 EXTERN_C_END 1815 1816 /*MC 1817 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1818 1819 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1820 and MATMPIAIJ otherwise. 1821 1822 Options Database Keys: 1823 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1824 1825 Level: beginner 1826 1827 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1828 M*/ 1829 1830 EXTERN_C_BEGIN 1831 #undef __FUNCT__ 1832 #define __FUNCT__ "MatCreate_AIJ" 1833 int MatCreate_AIJ(Mat A) { 1834 int ierr,size; 1835 1836 PetscFunctionBegin; 1837 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1838 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1839 if (size == 1) { 1840 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1841 } else { 1842 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1843 } 1844 PetscFunctionReturn(0); 1845 } 1846 EXTERN_C_END 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1850 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1851 { 1852 Mat mat; 1853 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1854 int ierr; 1855 1856 PetscFunctionBegin; 1857 *newmat = 0; 1858 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1859 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1860 a = (Mat_MPIAIJ*)mat->data; 1861 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1862 mat->factor = matin->factor; 1863 mat->assembled = PETSC_TRUE; 1864 mat->insertmode = NOT_SET_VALUES; 1865 mat->preallocated = PETSC_TRUE; 1866 1867 a->rstart = oldmat->rstart; 1868 a->rend = oldmat->rend; 1869 a->cstart = oldmat->cstart; 1870 a->cend = oldmat->cend; 1871 a->size = oldmat->size; 1872 a->rank = oldmat->rank; 1873 a->donotstash = oldmat->donotstash; 1874 a->roworiented = oldmat->roworiented; 1875 a->rowindices = 0; 1876 a->rowvalues = 0; 1877 a->getrowactive = PETSC_FALSE; 1878 1879 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1880 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1881 if (oldmat->colmap) { 1882 #if defined (PETSC_USE_CTABLE) 1883 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1884 #else 1885 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1886 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1887 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1888 #endif 1889 } else a->colmap = 0; 1890 if (oldmat->garray) { 1891 int len; 1892 len = oldmat->B->n; 1893 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1894 PetscLogObjectMemory(mat,len*sizeof(int)); 1895 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1896 } else a->garray = 0; 1897 1898 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1899 PetscLogObjectParent(mat,a->lvec); 1900 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1901 PetscLogObjectParent(mat,a->Mvctx); 1902 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1903 PetscLogObjectParent(mat,a->A); 1904 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1905 PetscLogObjectParent(mat,a->B); 1906 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1907 *newmat = mat; 1908 PetscFunctionReturn(0); 1909 } 1910 1911 #include "petscsys.h" 1912 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatLoad_MPIAIJ" 1915 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1916 { 1917 Mat A; 1918 PetscScalar *vals,*svals; 1919 MPI_Comm comm = ((PetscObject)viewer)->comm; 1920 MPI_Status status; 1921 int i,nz,ierr,j,rstart,rend,fd; 1922 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1923 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1924 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1925 1926 PetscFunctionBegin; 1927 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1928 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1929 if (!rank) { 1930 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1931 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1932 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1933 if (header[3] < 0) { 1934 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1935 } 1936 } 1937 1938 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1939 M = header[1]; N = header[2]; 1940 /* determine ownership of all rows */ 1941 m = M/size + ((M % size) > rank); 1942 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1943 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1944 rowners[0] = 0; 1945 for (i=2; i<=size; i++) { 1946 rowners[i] += rowners[i-1]; 1947 } 1948 rstart = rowners[rank]; 1949 rend = rowners[rank+1]; 1950 1951 /* distribute row lengths to all processors */ 1952 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1953 offlens = ourlens + (rend-rstart); 1954 if (!rank) { 1955 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1956 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1957 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1958 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1959 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1960 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1961 } else { 1962 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1963 } 1964 1965 if (!rank) { 1966 /* calculate the number of nonzeros on each processor */ 1967 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1968 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1969 for (i=0; i<size; i++) { 1970 for (j=rowners[i]; j< rowners[i+1]; j++) { 1971 procsnz[i] += rowlengths[j]; 1972 } 1973 } 1974 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1975 1976 /* determine max buffer needed and allocate it */ 1977 maxnz = 0; 1978 for (i=0; i<size; i++) { 1979 maxnz = PetscMax(maxnz,procsnz[i]); 1980 } 1981 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1982 1983 /* read in my part of the matrix column indices */ 1984 nz = procsnz[0]; 1985 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1986 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1987 1988 /* read in every one elses and ship off */ 1989 for (i=1; i<size; i++) { 1990 nz = procsnz[i]; 1991 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1992 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1993 } 1994 ierr = PetscFree(cols);CHKERRQ(ierr); 1995 } else { 1996 /* determine buffer space needed for message */ 1997 nz = 0; 1998 for (i=0; i<m; i++) { 1999 nz += ourlens[i]; 2000 } 2001 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2002 2003 /* receive message of column indices*/ 2004 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2005 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2006 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2007 } 2008 2009 /* determine column ownership if matrix is not square */ 2010 if (N != M) { 2011 n = N/size + ((N % size) > rank); 2012 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2013 cstart = cend - n; 2014 } else { 2015 cstart = rstart; 2016 cend = rend; 2017 n = cend - cstart; 2018 } 2019 2020 /* loop over local rows, determining number of off diagonal entries */ 2021 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2022 jj = 0; 2023 for (i=0; i<m; i++) { 2024 for (j=0; j<ourlens[i]; j++) { 2025 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2026 jj++; 2027 } 2028 } 2029 2030 /* create our matrix */ 2031 for (i=0; i<m; i++) { 2032 ourlens[i] -= offlens[i]; 2033 } 2034 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2035 ierr = MatSetType(A,type);CHKERRQ(ierr); 2036 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2037 2038 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2039 for (i=0; i<m; i++) { 2040 ourlens[i] += offlens[i]; 2041 } 2042 2043 if (!rank) { 2044 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2045 2046 /* read in my part of the matrix numerical values */ 2047 nz = procsnz[0]; 2048 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2049 2050 /* insert into matrix */ 2051 jj = rstart; 2052 smycols = mycols; 2053 svals = vals; 2054 for (i=0; i<m; i++) { 2055 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2056 smycols += ourlens[i]; 2057 svals += ourlens[i]; 2058 jj++; 2059 } 2060 2061 /* read in other processors and ship out */ 2062 for (i=1; i<size; i++) { 2063 nz = procsnz[i]; 2064 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2065 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2066 } 2067 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2068 } else { 2069 /* receive numeric values */ 2070 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2071 2072 /* receive message of values*/ 2073 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2074 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2075 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2076 2077 /* insert into matrix */ 2078 jj = rstart; 2079 smycols = mycols; 2080 svals = vals; 2081 for (i=0; i<m; i++) { 2082 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2083 smycols += ourlens[i]; 2084 svals += ourlens[i]; 2085 jj++; 2086 } 2087 } 2088 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2089 ierr = PetscFree(vals);CHKERRQ(ierr); 2090 ierr = PetscFree(mycols);CHKERRQ(ierr); 2091 ierr = PetscFree(rowners);CHKERRQ(ierr); 2092 2093 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2094 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2095 *newmat = A; 2096 PetscFunctionReturn(0); 2097 } 2098 2099 #undef __FUNCT__ 2100 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2101 /* 2102 Not great since it makes two copies of the submatrix, first an SeqAIJ 2103 in local and then by concatenating the local matrices the end result. 2104 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2105 */ 2106 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2107 { 2108 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2109 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2110 Mat *local,M,Mreuse; 2111 PetscScalar *vwork,*aa; 2112 MPI_Comm comm = mat->comm; 2113 Mat_SeqAIJ *aij; 2114 2115 2116 PetscFunctionBegin; 2117 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2118 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2119 2120 if (call == MAT_REUSE_MATRIX) { 2121 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2122 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2123 local = &Mreuse; 2124 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2125 } else { 2126 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2127 Mreuse = *local; 2128 ierr = PetscFree(local);CHKERRQ(ierr); 2129 } 2130 2131 /* 2132 m - number of local rows 2133 n - number of columns (same on all processors) 2134 rstart - first row in new global matrix generated 2135 */ 2136 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2137 if (call == MAT_INITIAL_MATRIX) { 2138 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2139 ii = aij->i; 2140 jj = aij->j; 2141 2142 /* 2143 Determine the number of non-zeros in the diagonal and off-diagonal 2144 portions of the matrix in order to do correct preallocation 2145 */ 2146 2147 /* first get start and end of "diagonal" columns */ 2148 if (csize == PETSC_DECIDE) { 2149 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2150 if (mglobal == n) { /* square matrix */ 2151 nlocal = m; 2152 } else { 2153 nlocal = n/size + ((n % size) > rank); 2154 } 2155 } else { 2156 nlocal = csize; 2157 } 2158 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2159 rstart = rend - nlocal; 2160 if (rank == size - 1 && rend != n) { 2161 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2162 } 2163 2164 /* next, compute all the lengths */ 2165 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2166 olens = dlens + m; 2167 for (i=0; i<m; i++) { 2168 jend = ii[i+1] - ii[i]; 2169 olen = 0; 2170 dlen = 0; 2171 for (j=0; j<jend; j++) { 2172 if (*jj < rstart || *jj >= rend) olen++; 2173 else dlen++; 2174 jj++; 2175 } 2176 olens[i] = olen; 2177 dlens[i] = dlen; 2178 } 2179 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2180 ierr = PetscFree(dlens);CHKERRQ(ierr); 2181 } else { 2182 int ml,nl; 2183 2184 M = *newmat; 2185 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2186 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2187 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2188 /* 2189 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2190 rather than the slower MatSetValues(). 2191 */ 2192 M->was_assembled = PETSC_TRUE; 2193 M->assembled = PETSC_FALSE; 2194 } 2195 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2196 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2197 ii = aij->i; 2198 jj = aij->j; 2199 aa = aij->a; 2200 for (i=0; i<m; i++) { 2201 row = rstart + i; 2202 nz = ii[i+1] - ii[i]; 2203 cwork = jj; jj += nz; 2204 vwork = aa; aa += nz; 2205 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2206 } 2207 2208 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2209 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2210 *newmat = M; 2211 2212 /* save submatrix used in processor for next request */ 2213 if (call == MAT_INITIAL_MATRIX) { 2214 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2215 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2216 } 2217 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2223 /*@C 2224 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2225 (the default parallel PETSc format). For good matrix assembly performance 2226 the user should preallocate the matrix storage by setting the parameters 2227 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2228 performance can be increased by more than a factor of 50. 2229 2230 Collective on MPI_Comm 2231 2232 Input Parameters: 2233 + A - the matrix 2234 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2235 (same value is used for all local rows) 2236 . d_nnz - array containing the number of nonzeros in the various rows of the 2237 DIAGONAL portion of the local submatrix (possibly different for each row) 2238 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2239 The size of this array is equal to the number of local rows, i.e 'm'. 2240 You must leave room for the diagonal entry even if it is zero. 2241 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2242 submatrix (same value is used for all local rows). 2243 - o_nnz - array containing the number of nonzeros in the various rows of the 2244 OFF-DIAGONAL portion of the local submatrix (possibly different for 2245 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2246 structure. The size of this array is equal to the number 2247 of local rows, i.e 'm'. 2248 2249 The AIJ format (also called the Yale sparse matrix format or 2250 compressed row storage), is fully compatible with standard Fortran 77 2251 storage. That is, the stored row and column indices can begin at 2252 either one (as in Fortran) or zero. See the users manual for details. 2253 2254 The user MUST specify either the local or global matrix dimensions 2255 (possibly both). 2256 2257 The parallel matrix is partitioned such that the first m0 rows belong to 2258 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2259 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2260 2261 The DIAGONAL portion of the local submatrix of a processor can be defined 2262 as the submatrix which is obtained by extraction the part corresponding 2263 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2264 first row that belongs to the processor, and r2 is the last row belonging 2265 to the this processor. This is a square mxm matrix. The remaining portion 2266 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2267 2268 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2269 2270 By default, this format uses inodes (identical nodes) when possible. 2271 We search for consecutive rows with the same nonzero structure, thereby 2272 reusing matrix information to achieve increased efficiency. 2273 2274 Options Database Keys: 2275 + -mat_aij_no_inode - Do not use inodes 2276 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2277 - -mat_aij_oneindex - Internally use indexing starting at 1 2278 rather than 0. Note that when calling MatSetValues(), 2279 the user still MUST index entries starting at 0! 2280 2281 Example usage: 2282 2283 Consider the following 8x8 matrix with 34 non-zero values, that is 2284 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2285 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2286 as follows: 2287 2288 .vb 2289 1 2 0 | 0 3 0 | 0 4 2290 Proc0 0 5 6 | 7 0 0 | 8 0 2291 9 0 10 | 11 0 0 | 12 0 2292 ------------------------------------- 2293 13 0 14 | 15 16 17 | 0 0 2294 Proc1 0 18 0 | 19 20 21 | 0 0 2295 0 0 0 | 22 23 0 | 24 0 2296 ------------------------------------- 2297 Proc2 25 26 27 | 0 0 28 | 29 0 2298 30 0 0 | 31 32 33 | 0 34 2299 .ve 2300 2301 This can be represented as a collection of submatrices as: 2302 2303 .vb 2304 A B C 2305 D E F 2306 G H I 2307 .ve 2308 2309 Where the submatrices A,B,C are owned by proc0, D,E,F are 2310 owned by proc1, G,H,I are owned by proc2. 2311 2312 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2313 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2314 The 'M','N' parameters are 8,8, and have the same values on all procs. 2315 2316 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2317 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2318 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2319 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2320 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2321 matrix, ans [DF] as another SeqAIJ matrix. 2322 2323 When d_nz, o_nz parameters are specified, d_nz storage elements are 2324 allocated for every row of the local diagonal submatrix, and o_nz 2325 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2326 One way to choose d_nz and o_nz is to use the max nonzerors per local 2327 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2328 In this case, the values of d_nz,o_nz are: 2329 .vb 2330 proc0 : dnz = 2, o_nz = 2 2331 proc1 : dnz = 3, o_nz = 2 2332 proc2 : dnz = 1, o_nz = 4 2333 .ve 2334 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2335 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2336 for proc3. i.e we are using 12+15+10=37 storage locations to store 2337 34 values. 2338 2339 When d_nnz, o_nnz parameters are specified, the storage is specified 2340 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2341 In the above case the values for d_nnz,o_nnz are: 2342 .vb 2343 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2344 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2345 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2346 .ve 2347 Here the space allocated is sum of all the above values i.e 34, and 2348 hence pre-allocation is perfect. 2349 2350 Level: intermediate 2351 2352 .keywords: matrix, aij, compressed row, sparse, parallel 2353 2354 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2355 @*/ 2356 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2357 { 2358 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2359 2360 PetscFunctionBegin; 2361 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2362 if (f) { 2363 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2364 } 2365 PetscFunctionReturn(0); 2366 } 2367 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatCreateMPIAIJ" 2370 /*@C 2371 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2372 (the default parallel PETSc format). For good matrix assembly performance 2373 the user should preallocate the matrix storage by setting the parameters 2374 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2375 performance can be increased by more than a factor of 50. 2376 2377 Collective on MPI_Comm 2378 2379 Input Parameters: 2380 + comm - MPI communicator 2381 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2382 This value should be the same as the local size used in creating the 2383 y vector for the matrix-vector product y = Ax. 2384 . n - This value should be the same as the local size used in creating the 2385 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2386 calculated if N is given) For square matrices n is almost always m. 2387 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2388 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2389 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2390 (same value is used for all local rows) 2391 . d_nnz - array containing the number of nonzeros in the various rows of the 2392 DIAGONAL portion of the local submatrix (possibly different for each row) 2393 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2394 The size of this array is equal to the number of local rows, i.e 'm'. 2395 You must leave room for the diagonal entry even if it is zero. 2396 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2397 submatrix (same value is used for all local rows). 2398 - o_nnz - array containing the number of nonzeros in the various rows of the 2399 OFF-DIAGONAL portion of the local submatrix (possibly different for 2400 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2401 structure. The size of this array is equal to the number 2402 of local rows, i.e 'm'. 2403 2404 Output Parameter: 2405 . A - the matrix 2406 2407 Notes: 2408 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2409 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2410 storage requirements for this matrix. 2411 2412 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2413 processor than it must be used on all processors that share the object for 2414 that argument. 2415 2416 The AIJ format (also called the Yale sparse matrix format or 2417 compressed row storage), is fully compatible with standard Fortran 77 2418 storage. That is, the stored row and column indices can begin at 2419 either one (as in Fortran) or zero. See the users manual for details. 2420 2421 The user MUST specify either the local or global matrix dimensions 2422 (possibly both). 2423 2424 The parallel matrix is partitioned such that the first m0 rows belong to 2425 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2426 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2427 2428 The DIAGONAL portion of the local submatrix of a processor can be defined 2429 as the submatrix which is obtained by extraction the part corresponding 2430 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2431 first row that belongs to the processor, and r2 is the last row belonging 2432 to the this processor. This is a square mxm matrix. The remaining portion 2433 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2434 2435 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2436 2437 When calling this routine with a single process communicator, a matrix of 2438 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2439 type of communicator, use the construction mechanism: 2440 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2441 2442 By default, this format uses inodes (identical nodes) when possible. 2443 We search for consecutive rows with the same nonzero structure, thereby 2444 reusing matrix information to achieve increased efficiency. 2445 2446 Options Database Keys: 2447 + -mat_aij_no_inode - Do not use inodes 2448 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2449 - -mat_aij_oneindex - Internally use indexing starting at 1 2450 rather than 0. Note that when calling MatSetValues(), 2451 the user still MUST index entries starting at 0! 2452 2453 2454 Example usage: 2455 2456 Consider the following 8x8 matrix with 34 non-zero values, that is 2457 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2458 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2459 as follows: 2460 2461 .vb 2462 1 2 0 | 0 3 0 | 0 4 2463 Proc0 0 5 6 | 7 0 0 | 8 0 2464 9 0 10 | 11 0 0 | 12 0 2465 ------------------------------------- 2466 13 0 14 | 15 16 17 | 0 0 2467 Proc1 0 18 0 | 19 20 21 | 0 0 2468 0 0 0 | 22 23 0 | 24 0 2469 ------------------------------------- 2470 Proc2 25 26 27 | 0 0 28 | 29 0 2471 30 0 0 | 31 32 33 | 0 34 2472 .ve 2473 2474 This can be represented as a collection of submatrices as: 2475 2476 .vb 2477 A B C 2478 D E F 2479 G H I 2480 .ve 2481 2482 Where the submatrices A,B,C are owned by proc0, D,E,F are 2483 owned by proc1, G,H,I are owned by proc2. 2484 2485 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2486 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2487 The 'M','N' parameters are 8,8, and have the same values on all procs. 2488 2489 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2490 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2491 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2492 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2493 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2494 matrix, ans [DF] as another SeqAIJ matrix. 2495 2496 When d_nz, o_nz parameters are specified, d_nz storage elements are 2497 allocated for every row of the local diagonal submatrix, and o_nz 2498 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2499 One way to choose d_nz and o_nz is to use the max nonzerors per local 2500 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2501 In this case, the values of d_nz,o_nz are: 2502 .vb 2503 proc0 : dnz = 2, o_nz = 2 2504 proc1 : dnz = 3, o_nz = 2 2505 proc2 : dnz = 1, o_nz = 4 2506 .ve 2507 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2508 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2509 for proc3. i.e we are using 12+15+10=37 storage locations to store 2510 34 values. 2511 2512 When d_nnz, o_nnz parameters are specified, the storage is specified 2513 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2514 In the above case the values for d_nnz,o_nnz are: 2515 .vb 2516 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2517 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2518 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2519 .ve 2520 Here the space allocated is sum of all the above values i.e 34, and 2521 hence pre-allocation is perfect. 2522 2523 Level: intermediate 2524 2525 .keywords: matrix, aij, compressed row, sparse, parallel 2526 2527 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2528 @*/ 2529 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2530 { 2531 int ierr,size; 2532 2533 PetscFunctionBegin; 2534 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2535 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2536 if (size > 1) { 2537 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2538 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2539 } else { 2540 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2541 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2542 } 2543 PetscFunctionReturn(0); 2544 } 2545 2546 #undef __FUNCT__ 2547 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2548 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2549 { 2550 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2551 PetscFunctionBegin; 2552 *Ad = a->A; 2553 *Ao = a->B; 2554 *colmap = a->garray; 2555 PetscFunctionReturn(0); 2556 } 2557 2558 #undef __FUNCT__ 2559 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2560 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2561 { 2562 int ierr,i; 2563 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2564 2565 PetscFunctionBegin; 2566 if (coloring->ctype == IS_COLORING_LOCAL) { 2567 ISColoringValue *allcolors,*colors; 2568 ISColoring ocoloring; 2569 2570 /* set coloring for diagonal portion */ 2571 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2572 2573 /* set coloring for off-diagonal portion */ 2574 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2575 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2576 for (i=0; i<a->B->n; i++) { 2577 colors[i] = allcolors[a->garray[i]]; 2578 } 2579 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2580 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2581 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2582 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2583 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2584 ISColoringValue *colors; 2585 int *larray; 2586 ISColoring ocoloring; 2587 2588 /* set coloring for diagonal portion */ 2589 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2590 for (i=0; i<a->A->n; i++) { 2591 larray[i] = i + a->cstart; 2592 } 2593 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2594 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2595 for (i=0; i<a->A->n; i++) { 2596 colors[i] = coloring->colors[larray[i]]; 2597 } 2598 ierr = PetscFree(larray);CHKERRQ(ierr); 2599 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2600 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2601 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2602 2603 /* set coloring for off-diagonal portion */ 2604 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2605 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2606 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2607 for (i=0; i<a->B->n; i++) { 2608 colors[i] = coloring->colors[larray[i]]; 2609 } 2610 ierr = PetscFree(larray);CHKERRQ(ierr); 2611 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2612 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2613 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2614 } else { 2615 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2616 } 2617 2618 PetscFunctionReturn(0); 2619 } 2620 2621 #undef __FUNCT__ 2622 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2623 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2624 { 2625 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2626 int ierr; 2627 2628 PetscFunctionBegin; 2629 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2630 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2631 PetscFunctionReturn(0); 2632 } 2633 2634 #undef __FUNCT__ 2635 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2636 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2637 { 2638 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2639 int ierr; 2640 2641 PetscFunctionBegin; 2642 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2643 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2644 PetscFunctionReturn(0); 2645 } 2646 2647 #undef __FUNCT__ 2648 #define __FUNCT__ "MatMerge" 2649 /*@C 2650 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2651 matrices from each processor 2652 2653 Collective on MPI_Comm 2654 2655 Input Parameters: 2656 + comm - the communicators the parallel matrix will live on 2657 - inmat - the input sequential matrices 2658 2659 Output Parameter: 2660 . outmat - the parallel matrix generated 2661 2662 Level: advanced 2663 2664 Notes: The number of columns of the matrix in EACH of the seperate files 2665 MUST be the same. 2666 2667 @*/ 2668 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2669 { 2670 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2671 PetscScalar *values; 2672 PetscMap columnmap,rowmap; 2673 2674 PetscFunctionBegin; 2675 2676 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2677 2678 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2679 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2680 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2681 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2682 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2683 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2684 2685 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2686 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2687 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2688 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2689 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2690 2691 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2692 for (i=0;i<m;i++) { 2693 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2694 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2695 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2696 } 2697 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2698 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2699 2700 for (i=0;i<m;i++) { 2701 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2702 I = i + rstart; 2703 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2704 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2705 } 2706 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2707 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2708 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2709 2710 PetscFunctionReturn(0); 2711 } 2712 2713 #undef __FUNCT__ 2714 #define __FUNCT__ "MatFileSplit" 2715 int MatFileSplit(Mat A,char *outfile) 2716 { 2717 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2718 PetscViewer out; 2719 char *name; 2720 Mat B; 2721 PetscScalar *values; 2722 2723 PetscFunctionBegin; 2724 2725 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2726 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2727 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2728 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2729 for (i=0;i<m;i++) { 2730 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2731 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2732 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2733 } 2734 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2735 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2736 2737 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2738 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2739 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2740 sprintf(name,"%s.%d",outfile,rank); 2741 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2742 ierr = PetscFree(name); 2743 ierr = MatView(B,out);CHKERRQ(ierr); 2744 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2745 ierr = MatDestroy(B);CHKERRQ(ierr); 2746 PetscFunctionReturn(0); 2747 } 2748