1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 /* 8 Local utility routine that creates a mapping from the global column 9 number to the local number in the off-diagonal part of the local 10 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 11 a slightly higher hash table cost; without it it is not scalable (each processor 12 has an order N integer array but is fast to acess. 13 */ 14 #undef __FUNCT__ 15 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 19 int n = aij->B->n,i,ierr; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 29 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 #define CHUNKSIZE 15 37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 38 { \ 39 \ 40 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 41 rmax = aimax[row]; nrow = ailen[row]; \ 42 col1 = col - shift; \ 43 \ 44 low = 0; high = nrow; \ 45 while (high-low > 5) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > col) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > col1) break; \ 52 if (rp[_i] == col1) { \ 53 if (addv == ADD_VALUES) ap[_i] += value; \ 54 else ap[_i] = value; \ 55 goto a_noinsert; \ 56 } \ 57 } \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (int*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(int));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (nonew == 1) goto b_noinsert; \ 133 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 134 if (nrow >= rmax) { \ 135 /* there is no extra room in row, therefore enlarge */ \ 136 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 137 PetscScalar *new_a; \ 138 \ 139 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 140 \ 141 /* malloc new storage space */ \ 142 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 143 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 144 new_j = (int*)(new_a + new_nz); \ 145 new_i = new_j + new_nz; \ 146 \ 147 /* copy over old data into new slots */ \ 148 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 149 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 150 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 151 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 152 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 153 len*sizeof(int));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 156 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 157 /* free up old matrix storage */ \ 158 \ 159 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 160 if (!b->singlemalloc) { \ 161 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 162 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 163 } \ 164 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 165 b->singlemalloc = PETSC_TRUE; \ 166 \ 167 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 168 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 169 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 170 b->maxnz += CHUNKSIZE; \ 171 b->reallocs++; \ 172 } \ 173 N = nrow++ - 1; b->nz++; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ap[ii+1] = ap[ii]; \ 178 } \ 179 rp[_i] = col1; \ 180 ap[_i] = value; \ 181 b_noinsert: ; \ 182 bilen[row] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIAIJ" 187 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 190 PetscScalar value; 191 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 192 int cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 int nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 277 int cstart = aij->cstart,cend = aij->cend,row,col; 278 279 PetscFunctionBegin; 280 for (i=0; i<m; i++) { 281 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 282 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 283 if (idxm[i] >= rstart && idxm[i] < rend) { 284 row = idxm[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 287 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 288 if (idxn[j] >= cstart && idxn[j] < cend){ 289 col = idxn[j] - cstart; 290 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 291 } else { 292 if (!aij->colmap) { 293 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 294 } 295 #if defined (PETSC_USE_CTABLE) 296 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 297 col --; 298 #else 299 col = aij->colmap[idxn[j]] - 1; 300 #endif 301 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 302 else { 303 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 304 } 305 } 306 } 307 } else { 308 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 309 } 310 } 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 316 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 317 { 318 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 319 int ierr,nstash,reallocs; 320 InsertMode addv; 321 322 PetscFunctionBegin; 323 if (aij->donotstash) { 324 PetscFunctionReturn(0); 325 } 326 327 /* make sure all processors are either in INSERTMODE or ADDMODE */ 328 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 329 if (addv == (ADD_VALUES|INSERT_VALUES)) { 330 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 331 } 332 mat->insertmode = addv; /* in case this processor had no cache */ 333 334 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 335 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 336 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 337 PetscFunctionReturn(0); 338 } 339 340 341 #undef __FUNCT__ 342 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 343 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 344 { 345 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 346 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 347 int i,j,rstart,ncols,n,ierr,flg; 348 int *row,*col,other_disassembled; 349 PetscScalar *val; 350 InsertMode addv = mat->insertmode; 351 352 PetscFunctionBegin; 353 if (!aij->donotstash) { 354 while (1) { 355 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 356 if (!flg) break; 357 358 for (i=0; i<n;) { 359 /* Now identify the consecutive vals belonging to the same row */ 360 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 361 if (j < n) ncols = j-i; 362 else ncols = n-i; 363 /* Now assemble all these values with a single function call */ 364 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 365 i = j; 366 } 367 } 368 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 369 } 370 371 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 372 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 373 374 /* determine if any processor has disassembled, if so we must 375 also disassemble ourselfs, in order that we may reassemble. */ 376 /* 377 if nonzero structure of submatrix B cannot change then we know that 378 no processor disassembled thus we can skip this stuff 379 */ 380 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 381 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 382 if (mat->was_assembled && !other_disassembled) { 383 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 384 } 385 } 386 387 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 388 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 389 } 390 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 391 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 392 393 if (aij->rowvalues) { 394 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 395 aij->rowvalues = 0; 396 } 397 398 /* used by MatAXPY() */ 399 a->xtoy = 0; b->xtoy = 0; 400 a->XtoY = 0; b->XtoY = 0; 401 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 407 int MatZeroEntries_MPIAIJ(Mat A) 408 { 409 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 410 int ierr; 411 412 PetscFunctionBegin; 413 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 414 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatZeroRows_MPIAIJ" 420 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 421 { 422 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 423 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 424 int *nprocs,j,idx,nsends,row; 425 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 426 int *rvalues,tag = A->tag,count,base,slen,n,*source; 427 int *lens,imdex,*lrows,*values,rstart=l->rstart; 428 MPI_Comm comm = A->comm; 429 MPI_Request *send_waits,*recv_waits; 430 MPI_Status recv_status,*send_status; 431 IS istmp; 432 PetscTruth found; 433 434 PetscFunctionBegin; 435 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 436 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 437 438 /* first count number of contributors to each processor */ 439 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 440 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 441 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 442 for (i=0; i<N; i++) { 443 idx = rows[i]; 444 found = PETSC_FALSE; 445 for (j=0; j<size; j++) { 446 if (idx >= owners[j] && idx < owners[j+1]) { 447 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 448 } 449 } 450 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 451 } 452 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 453 454 /* inform other processors of number of messages and max length*/ 455 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 456 457 /* post receives: */ 458 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 459 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 460 for (i=0; i<nrecvs; i++) { 461 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 462 } 463 464 /* do sends: 465 1) starts[i] gives the starting index in svalues for stuff going to 466 the ith processor 467 */ 468 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 469 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 470 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 471 starts[0] = 0; 472 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 473 for (i=0; i<N; i++) { 474 svalues[starts[owner[i]]++] = rows[i]; 475 } 476 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 477 478 starts[0] = 0; 479 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 480 count = 0; 481 for (i=0; i<size; i++) { 482 if (nprocs[2*i+1]) { 483 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 484 } 485 } 486 ierr = PetscFree(starts);CHKERRQ(ierr); 487 488 base = owners[rank]; 489 490 /* wait on receives */ 491 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 492 source = lens + nrecvs; 493 count = nrecvs; slen = 0; 494 while (count) { 495 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 496 /* unpack receives into our local space */ 497 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 498 source[imdex] = recv_status.MPI_SOURCE; 499 lens[imdex] = n; 500 slen += n; 501 count--; 502 } 503 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 504 505 /* move the data into the send scatter */ 506 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 507 count = 0; 508 for (i=0; i<nrecvs; i++) { 509 values = rvalues + i*nmax; 510 for (j=0; j<lens[i]; j++) { 511 lrows[count++] = values[j] - base; 512 } 513 } 514 ierr = PetscFree(rvalues);CHKERRQ(ierr); 515 ierr = PetscFree(lens);CHKERRQ(ierr); 516 ierr = PetscFree(owner);CHKERRQ(ierr); 517 ierr = PetscFree(nprocs);CHKERRQ(ierr); 518 519 /* actually zap the local rows */ 520 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 521 PetscLogObjectParent(A,istmp); 522 523 /* 524 Zero the required rows. If the "diagonal block" of the matrix 525 is square and the user wishes to set the diagonal we use seperate 526 code so that MatSetValues() is not called for each diagonal allocating 527 new memory, thus calling lots of mallocs and slowing things down. 528 529 Contributed by: Mathew Knepley 530 */ 531 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 532 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 533 if (diag && (l->A->M == l->A->N)) { 534 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 535 } else if (diag) { 536 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 537 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 538 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 539 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 540 } 541 for (i = 0; i < slen; i++) { 542 row = lrows[i] + rstart; 543 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 544 } 545 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 547 } else { 548 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 549 } 550 ierr = ISDestroy(istmp);CHKERRQ(ierr); 551 ierr = PetscFree(lrows);CHKERRQ(ierr); 552 553 /* wait on sends */ 554 if (nsends) { 555 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 556 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 557 ierr = PetscFree(send_status);CHKERRQ(ierr); 558 } 559 ierr = PetscFree(send_waits);CHKERRQ(ierr); 560 ierr = PetscFree(svalues);CHKERRQ(ierr); 561 562 PetscFunctionReturn(0); 563 } 564 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatMult_MPIAIJ" 567 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 568 { 569 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 570 int ierr,nt; 571 572 PetscFunctionBegin; 573 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 574 if (nt != A->n) { 575 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 576 } 577 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 578 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 579 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 580 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatMultAdd_MPIAIJ" 586 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 587 { 588 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 589 int ierr; 590 591 PetscFunctionBegin; 592 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 594 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 595 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 601 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 602 { 603 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 604 int ierr; 605 606 PetscFunctionBegin; 607 /* do nondiagonal part */ 608 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 609 /* send it on its way */ 610 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 611 /* do local part */ 612 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 613 /* receive remote parts: note this assumes the values are not actually */ 614 /* inserted in yy until the next line, which is true for my implementation*/ 615 /* but is not perhaps always true. */ 616 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 PetscFunctionReturn(0); 618 } 619 620 EXTERN_C_BEGIN 621 #undef __FUNCT__ 622 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 623 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 624 { 625 MPI_Comm comm; 626 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 627 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 628 IS Me,Notme; 629 int M,N,first,last,*notme,ntids,i, ierr; 630 631 PetscFunctionBegin; 632 633 /* Easy test: symmetric diagonal block */ 634 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 635 ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr); 636 if (!*f) PetscFunctionReturn(0); 637 ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr); 638 ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr); 639 if (ntids==1) PetscFunctionReturn(0); 640 641 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 642 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 643 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 644 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 645 for (i=0; i<first; i++) notme[i] = i; 646 for (i=last; i<M; i++) notme[i-last+first] = i; 647 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 648 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 649 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 650 Aoff = Aoffs[0]; 651 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 652 Boff = Boffs[0]; 653 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 654 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 655 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 656 ierr = ISDestroy(Me); CHKERRQ(ierr); 657 ierr = ISDestroy(Notme); CHKERRQ(ierr); 658 659 PetscFunctionReturn(0); 660 } 661 EXTERN_C_END 662 663 #undef __FUNCT__ 664 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 665 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 666 { 667 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 668 int ierr; 669 670 PetscFunctionBegin; 671 /* do nondiagonal part */ 672 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 673 /* send it on its way */ 674 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 675 /* do local part */ 676 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 677 /* receive remote parts: note this assumes the values are not actually */ 678 /* inserted in yy until the next line, which is true for my implementation*/ 679 /* but is not perhaps always true. */ 680 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 684 /* 685 This only works correctly for square matrices where the subblock A->A is the 686 diagonal block 687 */ 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 690 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 691 { 692 int ierr; 693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 694 695 PetscFunctionBegin; 696 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 697 if (a->rstart != a->cstart || a->rend != a->cend) { 698 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 699 } 700 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatScale_MPIAIJ" 706 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 707 { 708 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 709 int ierr; 710 711 PetscFunctionBegin; 712 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 713 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } 716 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatDestroy_MPIAIJ" 719 int MatDestroy_MPIAIJ(Mat mat) 720 { 721 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 722 int ierr; 723 724 PetscFunctionBegin; 725 #if defined(PETSC_USE_LOG) 726 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 727 #endif 728 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 729 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 730 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 731 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 732 #if defined (PETSC_USE_CTABLE) 733 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 734 #else 735 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 736 #endif 737 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 738 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 739 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 740 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 741 ierr = PetscFree(aij);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatView_MPIAIJ_Binary" 747 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 748 { 749 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 750 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 751 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 752 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 753 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 754 PetscScalar *column_values; 755 756 PetscFunctionBegin; 757 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 758 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 759 nz = A->nz + B->nz; 760 if (rank == 0) { 761 header[0] = MAT_FILE_COOKIE; 762 header[1] = mat->M; 763 header[2] = mat->N; 764 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 765 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 766 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 767 /* get largest number of rows any processor has */ 768 rlen = mat->m; 769 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 770 for (i=1; i<size; i++) { 771 rlen = PetscMax(rlen,range[i+1] - range[i]); 772 } 773 } else { 774 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 775 rlen = mat->m; 776 } 777 778 /* load up the local row counts */ 779 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 780 for (i=0; i<mat->m; i++) { 781 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 782 } 783 784 /* store the row lengths to the file */ 785 if (rank == 0) { 786 MPI_Status status; 787 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 788 for (i=1; i<size; i++) { 789 rlen = range[i+1] - range[i]; 790 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 791 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 792 } 793 } else { 794 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 795 } 796 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 797 798 /* load up the local column indices */ 799 nzmax = nz; /* )th processor needs space a largest processor needs */ 800 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 801 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 802 cnt = 0; 803 for (i=0; i<mat->m; i++) { 804 for (j=B->i[i]; j<B->i[i+1]; j++) { 805 if ( (col = garray[B->j[j]]) > cstart) break; 806 column_indices[cnt++] = col; 807 } 808 for (k=A->i[i]; k<A->i[i+1]; k++) { 809 column_indices[cnt++] = A->j[k] + cstart; 810 } 811 for (; j<B->i[i+1]; j++) { 812 column_indices[cnt++] = garray[B->j[j]]; 813 } 814 } 815 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 816 817 /* store the column indices to the file */ 818 if (rank == 0) { 819 MPI_Status status; 820 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 821 for (i=1; i<size; i++) { 822 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 824 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 825 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 826 } 827 } else { 828 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 830 } 831 ierr = PetscFree(column_indices);CHKERRQ(ierr); 832 833 /* load up the local column values */ 834 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 835 cnt = 0; 836 for (i=0; i<mat->m; i++) { 837 for (j=B->i[i]; j<B->i[i+1]; j++) { 838 if ( garray[B->j[j]] > cstart) break; 839 column_values[cnt++] = B->a[j]; 840 } 841 for (k=A->i[i]; k<A->i[i+1]; k++) { 842 column_values[cnt++] = A->a[k]; 843 } 844 for (; j<B->i[i+1]; j++) { 845 column_values[cnt++] = B->a[j]; 846 } 847 } 848 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 849 850 /* store the column values to the file */ 851 if (rank == 0) { 852 MPI_Status status; 853 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 854 for (i=1; i<size; i++) { 855 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 857 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 858 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 859 } 860 } else { 861 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 862 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 863 } 864 ierr = PetscFree(column_values);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 870 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 871 { 872 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 873 int ierr,rank = aij->rank,size = aij->size; 874 PetscTruth isdraw,isascii,flg,isbinary; 875 PetscViewer sviewer; 876 PetscViewerFormat format; 877 878 PetscFunctionBegin; 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 881 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 882 if (isascii) { 883 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 884 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 885 MatInfo info; 886 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 889 if (flg) { 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 891 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 892 } else { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 894 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 895 } 896 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 898 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_INFO) { 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 906 PetscFunctionReturn(0); 907 } 908 } else if (isbinary) { 909 if (size == 1) { 910 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 911 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 912 } else { 913 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 914 } 915 PetscFunctionReturn(0); 916 } else if (isdraw) { 917 PetscDraw draw; 918 PetscTruth isnull; 919 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 920 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 921 } 922 923 if (size == 1) { 924 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 925 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 926 } else { 927 /* assemble the entire matrix onto first processor. */ 928 Mat A; 929 Mat_SeqAIJ *Aloc; 930 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 931 PetscScalar *a; 932 933 if (!rank) { 934 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 935 } else { 936 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 937 } 938 PetscLogObjectParent(mat,A); 939 940 /* copy over the A part */ 941 Aloc = (Mat_SeqAIJ*)aij->A->data; 942 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 943 row = aij->rstart; 944 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 945 for (i=0; i<m; i++) { 946 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 947 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 948 } 949 aj = Aloc->j; 950 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 951 952 /* copy over the B part */ 953 Aloc = (Mat_SeqAIJ*)aij->B->data; 954 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 955 row = aij->rstart; 956 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 957 ct = cols; 958 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 959 for (i=0; i<m; i++) { 960 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 961 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 962 } 963 ierr = PetscFree(ct);CHKERRQ(ierr); 964 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 /* 967 Everyone has to call to draw the matrix since the graphics waits are 968 synchronized across all processors that share the PetscDraw object 969 */ 970 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 971 if (!rank) { 972 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 973 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 974 } 975 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 976 ierr = MatDestroy(A);CHKERRQ(ierr); 977 } 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatView_MPIAIJ" 983 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 984 { 985 int ierr; 986 PetscTruth isascii,isdraw,issocket,isbinary; 987 988 PetscFunctionBegin; 989 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 990 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 991 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 992 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 993 if (isascii || isdraw || isbinary || issocket) { 994 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 995 } else { 996 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatRelax_MPIAIJ" 1005 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1006 { 1007 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1008 int ierr; 1009 Vec bb1; 1010 PetscScalar mone=-1.0; 1011 1012 PetscFunctionBegin; 1013 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1014 1015 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1016 1017 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1018 if (flag & SOR_ZERO_INITIAL_GUESS) { 1019 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1020 its--; 1021 } 1022 1023 while (its--) { 1024 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1026 1027 /* update rhs: bb1 = bb - B*x */ 1028 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1029 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1030 1031 /* local sweep */ 1032 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1033 CHKERRQ(ierr); 1034 } 1035 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1036 if (flag & SOR_ZERO_INITIAL_GUESS) { 1037 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1038 its--; 1039 } 1040 while (its--) { 1041 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1042 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1043 1044 /* update rhs: bb1 = bb - B*x */ 1045 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1046 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1047 1048 /* local sweep */ 1049 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1050 CHKERRQ(ierr); 1051 } 1052 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1053 if (flag & SOR_ZERO_INITIAL_GUESS) { 1054 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1055 its--; 1056 } 1057 while (its--) { 1058 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1059 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1060 1061 /* update rhs: bb1 = bb - B*x */ 1062 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1063 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1064 1065 /* local sweep */ 1066 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1067 CHKERRQ(ierr); 1068 } 1069 } else { 1070 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1071 } 1072 1073 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1079 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1080 { 1081 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1082 Mat A = mat->A,B = mat->B; 1083 int ierr; 1084 PetscReal isend[5],irecv[5]; 1085 1086 PetscFunctionBegin; 1087 info->block_size = 1.0; 1088 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1089 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1090 isend[3] = info->memory; isend[4] = info->mallocs; 1091 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1092 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1093 isend[3] += info->memory; isend[4] += info->mallocs; 1094 if (flag == MAT_LOCAL) { 1095 info->nz_used = isend[0]; 1096 info->nz_allocated = isend[1]; 1097 info->nz_unneeded = isend[2]; 1098 info->memory = isend[3]; 1099 info->mallocs = isend[4]; 1100 } else if (flag == MAT_GLOBAL_MAX) { 1101 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1102 info->nz_used = irecv[0]; 1103 info->nz_allocated = irecv[1]; 1104 info->nz_unneeded = irecv[2]; 1105 info->memory = irecv[3]; 1106 info->mallocs = irecv[4]; 1107 } else if (flag == MAT_GLOBAL_SUM) { 1108 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1109 info->nz_used = irecv[0]; 1110 info->nz_allocated = irecv[1]; 1111 info->nz_unneeded = irecv[2]; 1112 info->memory = irecv[3]; 1113 info->mallocs = irecv[4]; 1114 } 1115 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1116 info->fill_ratio_needed = 0; 1117 info->factor_mallocs = 0; 1118 info->rows_global = (double)matin->M; 1119 info->columns_global = (double)matin->N; 1120 info->rows_local = (double)matin->m; 1121 info->columns_local = (double)matin->N; 1122 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatSetOption_MPIAIJ" 1128 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1129 { 1130 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1131 int ierr; 1132 1133 PetscFunctionBegin; 1134 switch (op) { 1135 case MAT_NO_NEW_NONZERO_LOCATIONS: 1136 case MAT_YES_NEW_NONZERO_LOCATIONS: 1137 case MAT_COLUMNS_UNSORTED: 1138 case MAT_COLUMNS_SORTED: 1139 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1140 case MAT_KEEP_ZEROED_ROWS: 1141 case MAT_NEW_NONZERO_LOCATION_ERR: 1142 case MAT_USE_INODES: 1143 case MAT_DO_NOT_USE_INODES: 1144 case MAT_IGNORE_ZERO_ENTRIES: 1145 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1146 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1147 break; 1148 case MAT_ROW_ORIENTED: 1149 a->roworiented = PETSC_TRUE; 1150 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1151 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1152 break; 1153 case MAT_ROWS_SORTED: 1154 case MAT_ROWS_UNSORTED: 1155 case MAT_YES_NEW_DIAGONALS: 1156 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1157 break; 1158 case MAT_COLUMN_ORIENTED: 1159 a->roworiented = PETSC_FALSE; 1160 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1161 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1162 break; 1163 case MAT_IGNORE_OFF_PROC_ENTRIES: 1164 a->donotstash = PETSC_TRUE; 1165 break; 1166 case MAT_NO_NEW_DIAGONALS: 1167 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1168 case MAT_SYMMETRIC: 1169 case MAT_STRUCTURALLY_SYMMETRIC: 1170 break; 1171 default: 1172 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1173 } 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatGetRow_MPIAIJ" 1179 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1180 { 1181 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1182 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1183 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1184 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1185 int *cmap,*idx_p; 1186 1187 PetscFunctionBegin; 1188 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1189 mat->getrowactive = PETSC_TRUE; 1190 1191 if (!mat->rowvalues && (idx || v)) { 1192 /* 1193 allocate enough space to hold information from the longest row. 1194 */ 1195 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1196 int max = 1,tmp; 1197 for (i=0; i<matin->m; i++) { 1198 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1199 if (max < tmp) { max = tmp; } 1200 } 1201 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1202 mat->rowindices = (int*)(mat->rowvalues + max); 1203 } 1204 1205 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1206 lrow = row - rstart; 1207 1208 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1209 if (!v) {pvA = 0; pvB = 0;} 1210 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1211 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1212 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1213 nztot = nzA + nzB; 1214 1215 cmap = mat->garray; 1216 if (v || idx) { 1217 if (nztot) { 1218 /* Sort by increasing column numbers, assuming A and B already sorted */ 1219 int imark = -1; 1220 if (v) { 1221 *v = v_p = mat->rowvalues; 1222 for (i=0; i<nzB; i++) { 1223 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1224 else break; 1225 } 1226 imark = i; 1227 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1228 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1229 } 1230 if (idx) { 1231 *idx = idx_p = mat->rowindices; 1232 if (imark > -1) { 1233 for (i=0; i<imark; i++) { 1234 idx_p[i] = cmap[cworkB[i]]; 1235 } 1236 } else { 1237 for (i=0; i<nzB; i++) { 1238 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1239 else break; 1240 } 1241 imark = i; 1242 } 1243 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1244 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1245 } 1246 } else { 1247 if (idx) *idx = 0; 1248 if (v) *v = 0; 1249 } 1250 } 1251 *nz = nztot; 1252 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1253 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1259 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1260 { 1261 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1262 1263 PetscFunctionBegin; 1264 if (aij->getrowactive == PETSC_FALSE) { 1265 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1266 } 1267 aij->getrowactive = PETSC_FALSE; 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatNorm_MPIAIJ" 1273 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1277 int ierr,i,j,cstart = aij->cstart; 1278 PetscReal sum = 0.0; 1279 PetscScalar *v; 1280 1281 PetscFunctionBegin; 1282 if (aij->size == 1) { 1283 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1284 } else { 1285 if (type == NORM_FROBENIUS) { 1286 v = amat->a; 1287 for (i=0; i<amat->nz; i++) { 1288 #if defined(PETSC_USE_COMPLEX) 1289 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1290 #else 1291 sum += (*v)*(*v); v++; 1292 #endif 1293 } 1294 v = bmat->a; 1295 for (i=0; i<bmat->nz; i++) { 1296 #if defined(PETSC_USE_COMPLEX) 1297 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1298 #else 1299 sum += (*v)*(*v); v++; 1300 #endif 1301 } 1302 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1303 *norm = sqrt(*norm); 1304 } else if (type == NORM_1) { /* max column norm */ 1305 PetscReal *tmp,*tmp2; 1306 int *jj,*garray = aij->garray; 1307 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1308 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1309 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1310 *norm = 0.0; 1311 v = amat->a; jj = amat->j; 1312 for (j=0; j<amat->nz; j++) { 1313 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1314 } 1315 v = bmat->a; jj = bmat->j; 1316 for (j=0; j<bmat->nz; j++) { 1317 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1318 } 1319 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1320 for (j=0; j<mat->N; j++) { 1321 if (tmp2[j] > *norm) *norm = tmp2[j]; 1322 } 1323 ierr = PetscFree(tmp);CHKERRQ(ierr); 1324 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1325 } else if (type == NORM_INFINITY) { /* max row norm */ 1326 PetscReal ntemp = 0.0; 1327 for (j=0; j<aij->A->m; j++) { 1328 v = amat->a + amat->i[j]; 1329 sum = 0.0; 1330 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1331 sum += PetscAbsScalar(*v); v++; 1332 } 1333 v = bmat->a + bmat->i[j]; 1334 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1335 sum += PetscAbsScalar(*v); v++; 1336 } 1337 if (sum > ntemp) ntemp = sum; 1338 } 1339 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1340 } else { 1341 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1342 } 1343 } 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatTranspose_MPIAIJ" 1349 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1350 { 1351 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1352 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1353 int ierr; 1354 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1355 Mat B; 1356 PetscScalar *array; 1357 1358 PetscFunctionBegin; 1359 if (!matout && M != N) { 1360 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1361 } 1362 1363 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1364 1365 /* copy over the A part */ 1366 Aloc = (Mat_SeqAIJ*)a->A->data; 1367 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1368 row = a->rstart; 1369 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1370 for (i=0; i<m; i++) { 1371 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1372 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1373 } 1374 aj = Aloc->j; 1375 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1376 1377 /* copy over the B part */ 1378 Aloc = (Mat_SeqAIJ*)a->B->data; 1379 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1380 row = a->rstart; 1381 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1382 ct = cols; 1383 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1384 for (i=0; i<m; i++) { 1385 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1386 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1387 } 1388 ierr = PetscFree(ct);CHKERRQ(ierr); 1389 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 if (matout) { 1392 *matout = B; 1393 } else { 1394 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1395 } 1396 PetscFunctionReturn(0); 1397 } 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1401 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1402 { 1403 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1404 Mat a = aij->A,b = aij->B; 1405 int ierr,s1,s2,s3; 1406 1407 PetscFunctionBegin; 1408 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1409 if (rr) { 1410 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1411 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1412 /* Overlap communication with computation. */ 1413 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1414 } 1415 if (ll) { 1416 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1417 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1418 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1419 } 1420 /* scale the diagonal block */ 1421 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1422 1423 if (rr) { 1424 /* Do a scatter end and then right scale the off-diagonal block */ 1425 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1426 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1427 } 1428 1429 PetscFunctionReturn(0); 1430 } 1431 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1435 int MatPrintHelp_MPIAIJ(Mat A) 1436 { 1437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1438 int ierr; 1439 1440 PetscFunctionBegin; 1441 if (!a->rank) { 1442 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1449 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1450 { 1451 PetscFunctionBegin; 1452 *bs = 1; 1453 PetscFunctionReturn(0); 1454 } 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1457 int MatSetUnfactored_MPIAIJ(Mat A) 1458 { 1459 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1460 int ierr; 1461 1462 PetscFunctionBegin; 1463 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatEqual_MPIAIJ" 1469 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1470 { 1471 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1472 Mat a,b,c,d; 1473 PetscTruth flg; 1474 int ierr; 1475 1476 PetscFunctionBegin; 1477 a = matA->A; b = matA->B; 1478 c = matB->A; d = matB->B; 1479 1480 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1481 if (flg == PETSC_TRUE) { 1482 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1483 } 1484 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatCopy_MPIAIJ" 1490 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1491 { 1492 int ierr; 1493 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1494 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1495 1496 PetscFunctionBegin; 1497 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1498 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1499 /* because of the column compression in the off-processor part of the matrix a->B, 1500 the number of columns in a->B and b->B may be different, hence we cannot call 1501 the MatCopy() directly on the two parts. If need be, we can provide a more 1502 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1503 then copying the submatrices */ 1504 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1505 } else { 1506 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1507 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1514 int MatSetUpPreallocation_MPIAIJ(Mat A) 1515 { 1516 int ierr; 1517 1518 PetscFunctionBegin; 1519 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 #include "petscblaslapack.h" 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatAXPY_MPIAIJ" 1526 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1527 { 1528 int ierr,one=1,i; 1529 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1530 Mat_SeqAIJ *x,*y; 1531 1532 PetscFunctionBegin; 1533 if (str == SAME_NONZERO_PATTERN) { 1534 x = (Mat_SeqAIJ *)xx->A->data; 1535 y = (Mat_SeqAIJ *)yy->A->data; 1536 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1537 x = (Mat_SeqAIJ *)xx->B->data; 1538 y = (Mat_SeqAIJ *)yy->B->data; 1539 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1540 } else if (str == SUBSET_NONZERO_PATTERN) { 1541 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1542 1543 x = (Mat_SeqAIJ *)xx->B->data; 1544 y = (Mat_SeqAIJ *)yy->B->data; 1545 if (y->xtoy && y->XtoY != xx->B) { 1546 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1547 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1548 } 1549 if (!y->xtoy) { /* get xtoy */ 1550 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1551 y->XtoY = xx->B; 1552 } 1553 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1554 } else { 1555 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1556 } 1557 PetscFunctionReturn(0); 1558 } 1559 1560 /* -------------------------------------------------------------------*/ 1561 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1562 MatGetRow_MPIAIJ, 1563 MatRestoreRow_MPIAIJ, 1564 MatMult_MPIAIJ, 1565 /* 4*/ MatMultAdd_MPIAIJ, 1566 MatMultTranspose_MPIAIJ, 1567 MatMultTransposeAdd_MPIAIJ, 1568 0, 1569 0, 1570 0, 1571 /*10*/ 0, 1572 0, 1573 0, 1574 MatRelax_MPIAIJ, 1575 MatTranspose_MPIAIJ, 1576 /*15*/ MatGetInfo_MPIAIJ, 1577 MatEqual_MPIAIJ, 1578 MatGetDiagonal_MPIAIJ, 1579 MatDiagonalScale_MPIAIJ, 1580 MatNorm_MPIAIJ, 1581 /*20*/ MatAssemblyBegin_MPIAIJ, 1582 MatAssemblyEnd_MPIAIJ, 1583 0, 1584 MatSetOption_MPIAIJ, 1585 MatZeroEntries_MPIAIJ, 1586 /*25*/ MatZeroRows_MPIAIJ, 1587 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1588 MatLUFactorSymbolic_MPIAIJ_TFS, 1589 #else 1590 0, 1591 #endif 1592 0, 1593 0, 1594 0, 1595 /*30*/ MatSetUpPreallocation_MPIAIJ, 1596 0, 1597 0, 1598 0, 1599 0, 1600 /*35*/ MatDuplicate_MPIAIJ, 1601 0, 1602 0, 1603 0, 1604 0, 1605 /*40*/ MatAXPY_MPIAIJ, 1606 MatGetSubMatrices_MPIAIJ, 1607 MatIncreaseOverlap_MPIAIJ, 1608 MatGetValues_MPIAIJ, 1609 MatCopy_MPIAIJ, 1610 /*45*/ MatPrintHelp_MPIAIJ, 1611 MatScale_MPIAIJ, 1612 0, 1613 0, 1614 0, 1615 /*50*/ MatGetBlockSize_MPIAIJ, 1616 0, 1617 0, 1618 0, 1619 0, 1620 /*55*/ MatFDColoringCreate_MPIAIJ, 1621 0, 1622 MatSetUnfactored_MPIAIJ, 1623 0, 1624 0, 1625 /*60*/ MatGetSubMatrix_MPIAIJ, 1626 MatDestroy_MPIAIJ, 1627 MatView_MPIAIJ, 1628 MatGetPetscMaps_Petsc, 1629 0, 1630 /*65*/ 0, 1631 0, 1632 0, 1633 0, 1634 0, 1635 /*70*/ 0, 1636 0, 1637 MatSetColoring_MPIAIJ, 1638 MatSetValuesAdic_MPIAIJ, 1639 MatSetValuesAdifor_MPIAIJ, 1640 /*75*/ 0, 1641 0, 1642 0, 1643 0, 1644 0, 1645 /*80*/ 0, 1646 0, 1647 0, 1648 0, 1649 0, 1650 /*85*/ MatLoad_MPIAIJ}; 1651 1652 /* ----------------------------------------------------------------------------------------*/ 1653 1654 EXTERN_C_BEGIN 1655 #undef __FUNCT__ 1656 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1657 int MatStoreValues_MPIAIJ(Mat mat) 1658 { 1659 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1660 int ierr; 1661 1662 PetscFunctionBegin; 1663 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1664 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1665 PetscFunctionReturn(0); 1666 } 1667 EXTERN_C_END 1668 1669 EXTERN_C_BEGIN 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1672 int MatRetrieveValues_MPIAIJ(Mat mat) 1673 { 1674 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1675 int ierr; 1676 1677 PetscFunctionBegin; 1678 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1679 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1680 PetscFunctionReturn(0); 1681 } 1682 EXTERN_C_END 1683 1684 #include "petscpc.h" 1685 EXTERN_C_BEGIN 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1688 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1689 { 1690 Mat_MPIAIJ *b; 1691 int ierr,i; 1692 1693 PetscFunctionBegin; 1694 B->preallocated = PETSC_TRUE; 1695 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1696 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1697 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1698 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1699 if (d_nnz) { 1700 for (i=0; i<B->m; i++) { 1701 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1702 } 1703 } 1704 if (o_nnz) { 1705 for (i=0; i<B->m; i++) { 1706 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1707 } 1708 } 1709 b = (Mat_MPIAIJ*)B->data; 1710 1711 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1712 PetscLogObjectParent(B,b->A); 1713 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1714 PetscLogObjectParent(B,b->B); 1715 1716 PetscFunctionReturn(0); 1717 } 1718 EXTERN_C_END 1719 1720 /*MC 1721 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1722 1723 Options Database Keys: 1724 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1725 1726 Level: beginner 1727 1728 .seealso: MatCreateMPIAIJ 1729 M*/ 1730 1731 EXTERN_C_BEGIN 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatCreate_MPIAIJ" 1734 int MatCreate_MPIAIJ(Mat B) 1735 { 1736 Mat_MPIAIJ *b; 1737 int ierr,i,size; 1738 1739 PetscFunctionBegin; 1740 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1741 1742 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1743 B->data = (void*)b; 1744 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1745 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1746 B->factor = 0; 1747 B->assembled = PETSC_FALSE; 1748 B->mapping = 0; 1749 1750 B->insertmode = NOT_SET_VALUES; 1751 b->size = size; 1752 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1753 1754 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1755 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1756 1757 /* the information in the maps duplicates the information computed below, eventually 1758 we should remove the duplicate information that is not contained in the maps */ 1759 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1760 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1761 1762 /* build local table of row and column ownerships */ 1763 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1764 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1765 b->cowners = b->rowners + b->size + 2; 1766 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1767 b->rowners[0] = 0; 1768 for (i=2; i<=b->size; i++) { 1769 b->rowners[i] += b->rowners[i-1]; 1770 } 1771 b->rstart = b->rowners[b->rank]; 1772 b->rend = b->rowners[b->rank+1]; 1773 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1774 b->cowners[0] = 0; 1775 for (i=2; i<=b->size; i++) { 1776 b->cowners[i] += b->cowners[i-1]; 1777 } 1778 b->cstart = b->cowners[b->rank]; 1779 b->cend = b->cowners[b->rank+1]; 1780 1781 /* build cache for off array entries formed */ 1782 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1783 b->donotstash = PETSC_FALSE; 1784 b->colmap = 0; 1785 b->garray = 0; 1786 b->roworiented = PETSC_TRUE; 1787 1788 /* stuff used for matrix vector multiply */ 1789 b->lvec = PETSC_NULL; 1790 b->Mvctx = PETSC_NULL; 1791 1792 /* stuff for MatGetRow() */ 1793 b->rowindices = 0; 1794 b->rowvalues = 0; 1795 b->getrowactive = PETSC_FALSE; 1796 1797 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1798 "MatStoreValues_MPIAIJ", 1799 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1800 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1801 "MatRetrieveValues_MPIAIJ", 1802 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1803 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1804 "MatGetDiagonalBlock_MPIAIJ", 1805 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1806 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1807 "MatIsSymmetric_MPIAIJ", 1808 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1809 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1810 "MatMPIAIJSetPreallocation_MPIAIJ", 1811 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1812 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1813 "MatDiagonalScaleLocal_MPIAIJ", 1814 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1815 PetscFunctionReturn(0); 1816 } 1817 EXTERN_C_END 1818 1819 /*MC 1820 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1821 1822 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1823 and MATMPIAIJ otherwise. 1824 1825 Options Database Keys: 1826 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1827 1828 Level: beginner 1829 1830 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1831 M*/ 1832 1833 EXTERN_C_BEGIN 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatCreate_AIJ" 1836 int MatCreate_AIJ(Mat A) { 1837 int ierr,size; 1838 1839 PetscFunctionBegin; 1840 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1841 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1842 if (size == 1) { 1843 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1844 } else { 1845 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1846 } 1847 PetscFunctionReturn(0); 1848 } 1849 EXTERN_C_END 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1853 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1854 { 1855 Mat mat; 1856 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1857 int ierr; 1858 1859 PetscFunctionBegin; 1860 *newmat = 0; 1861 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1862 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1863 a = (Mat_MPIAIJ*)mat->data; 1864 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1865 mat->factor = matin->factor; 1866 mat->assembled = PETSC_TRUE; 1867 mat->insertmode = NOT_SET_VALUES; 1868 mat->preallocated = PETSC_TRUE; 1869 1870 a->rstart = oldmat->rstart; 1871 a->rend = oldmat->rend; 1872 a->cstart = oldmat->cstart; 1873 a->cend = oldmat->cend; 1874 a->size = oldmat->size; 1875 a->rank = oldmat->rank; 1876 a->donotstash = oldmat->donotstash; 1877 a->roworiented = oldmat->roworiented; 1878 a->rowindices = 0; 1879 a->rowvalues = 0; 1880 a->getrowactive = PETSC_FALSE; 1881 1882 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1883 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1884 if (oldmat->colmap) { 1885 #if defined (PETSC_USE_CTABLE) 1886 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1887 #else 1888 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1889 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1890 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1891 #endif 1892 } else a->colmap = 0; 1893 if (oldmat->garray) { 1894 int len; 1895 len = oldmat->B->n; 1896 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1897 PetscLogObjectMemory(mat,len*sizeof(int)); 1898 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1899 } else a->garray = 0; 1900 1901 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1902 PetscLogObjectParent(mat,a->lvec); 1903 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1904 PetscLogObjectParent(mat,a->Mvctx); 1905 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1906 PetscLogObjectParent(mat,a->A); 1907 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1908 PetscLogObjectParent(mat,a->B); 1909 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1910 *newmat = mat; 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #include "petscsys.h" 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatLoad_MPIAIJ" 1918 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1919 { 1920 Mat A; 1921 PetscScalar *vals,*svals; 1922 MPI_Comm comm = ((PetscObject)viewer)->comm; 1923 MPI_Status status; 1924 int i,nz,ierr,j,rstart,rend,fd; 1925 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1926 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1927 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1928 1929 PetscFunctionBegin; 1930 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1931 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1932 if (!rank) { 1933 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1934 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1935 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1936 if (header[3] < 0) { 1937 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1938 } 1939 } 1940 1941 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1942 M = header[1]; N = header[2]; 1943 /* determine ownership of all rows */ 1944 m = M/size + ((M % size) > rank); 1945 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1946 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1947 rowners[0] = 0; 1948 for (i=2; i<=size; i++) { 1949 rowners[i] += rowners[i-1]; 1950 } 1951 rstart = rowners[rank]; 1952 rend = rowners[rank+1]; 1953 1954 /* distribute row lengths to all processors */ 1955 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1956 offlens = ourlens + (rend-rstart); 1957 if (!rank) { 1958 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1959 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1960 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1961 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1962 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1963 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1964 } else { 1965 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1966 } 1967 1968 if (!rank) { 1969 /* calculate the number of nonzeros on each processor */ 1970 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1971 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1972 for (i=0; i<size; i++) { 1973 for (j=rowners[i]; j< rowners[i+1]; j++) { 1974 procsnz[i] += rowlengths[j]; 1975 } 1976 } 1977 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1978 1979 /* determine max buffer needed and allocate it */ 1980 maxnz = 0; 1981 for (i=0; i<size; i++) { 1982 maxnz = PetscMax(maxnz,procsnz[i]); 1983 } 1984 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1985 1986 /* read in my part of the matrix column indices */ 1987 nz = procsnz[0]; 1988 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1989 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1990 1991 /* read in every one elses and ship off */ 1992 for (i=1; i<size; i++) { 1993 nz = procsnz[i]; 1994 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1995 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1996 } 1997 ierr = PetscFree(cols);CHKERRQ(ierr); 1998 } else { 1999 /* determine buffer space needed for message */ 2000 nz = 0; 2001 for (i=0; i<m; i++) { 2002 nz += ourlens[i]; 2003 } 2004 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2005 2006 /* receive message of column indices*/ 2007 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2008 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2009 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2010 } 2011 2012 /* determine column ownership if matrix is not square */ 2013 if (N != M) { 2014 n = N/size + ((N % size) > rank); 2015 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2016 cstart = cend - n; 2017 } else { 2018 cstart = rstart; 2019 cend = rend; 2020 n = cend - cstart; 2021 } 2022 2023 /* loop over local rows, determining number of off diagonal entries */ 2024 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2025 jj = 0; 2026 for (i=0; i<m; i++) { 2027 for (j=0; j<ourlens[i]; j++) { 2028 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2029 jj++; 2030 } 2031 } 2032 2033 /* create our matrix */ 2034 for (i=0; i<m; i++) { 2035 ourlens[i] -= offlens[i]; 2036 } 2037 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2038 ierr = MatSetType(A,type);CHKERRQ(ierr); 2039 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2040 2041 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2042 for (i=0; i<m; i++) { 2043 ourlens[i] += offlens[i]; 2044 } 2045 2046 if (!rank) { 2047 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2048 2049 /* read in my part of the matrix numerical values */ 2050 nz = procsnz[0]; 2051 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2052 2053 /* insert into matrix */ 2054 jj = rstart; 2055 smycols = mycols; 2056 svals = vals; 2057 for (i=0; i<m; i++) { 2058 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2059 smycols += ourlens[i]; 2060 svals += ourlens[i]; 2061 jj++; 2062 } 2063 2064 /* read in other processors and ship out */ 2065 for (i=1; i<size; i++) { 2066 nz = procsnz[i]; 2067 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2068 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2069 } 2070 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2071 } else { 2072 /* receive numeric values */ 2073 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2074 2075 /* receive message of values*/ 2076 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2077 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2078 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2079 2080 /* insert into matrix */ 2081 jj = rstart; 2082 smycols = mycols; 2083 svals = vals; 2084 for (i=0; i<m; i++) { 2085 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2086 smycols += ourlens[i]; 2087 svals += ourlens[i]; 2088 jj++; 2089 } 2090 } 2091 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2092 ierr = PetscFree(vals);CHKERRQ(ierr); 2093 ierr = PetscFree(mycols);CHKERRQ(ierr); 2094 ierr = PetscFree(rowners);CHKERRQ(ierr); 2095 2096 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2097 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2098 *newmat = A; 2099 PetscFunctionReturn(0); 2100 } 2101 2102 #undef __FUNCT__ 2103 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2104 /* 2105 Not great since it makes two copies of the submatrix, first an SeqAIJ 2106 in local and then by concatenating the local matrices the end result. 2107 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2108 */ 2109 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2110 { 2111 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2112 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2113 Mat *local,M,Mreuse; 2114 PetscScalar *vwork,*aa; 2115 MPI_Comm comm = mat->comm; 2116 Mat_SeqAIJ *aij; 2117 2118 2119 PetscFunctionBegin; 2120 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2121 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2122 2123 if (call == MAT_REUSE_MATRIX) { 2124 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2125 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2126 local = &Mreuse; 2127 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2128 } else { 2129 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2130 Mreuse = *local; 2131 ierr = PetscFree(local);CHKERRQ(ierr); 2132 } 2133 2134 /* 2135 m - number of local rows 2136 n - number of columns (same on all processors) 2137 rstart - first row in new global matrix generated 2138 */ 2139 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2140 if (call == MAT_INITIAL_MATRIX) { 2141 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2142 ii = aij->i; 2143 jj = aij->j; 2144 2145 /* 2146 Determine the number of non-zeros in the diagonal and off-diagonal 2147 portions of the matrix in order to do correct preallocation 2148 */ 2149 2150 /* first get start and end of "diagonal" columns */ 2151 if (csize == PETSC_DECIDE) { 2152 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2153 if (mglobal == n) { /* square matrix */ 2154 nlocal = m; 2155 } else { 2156 nlocal = n/size + ((n % size) > rank); 2157 } 2158 } else { 2159 nlocal = csize; 2160 } 2161 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2162 rstart = rend - nlocal; 2163 if (rank == size - 1 && rend != n) { 2164 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2165 } 2166 2167 /* next, compute all the lengths */ 2168 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2169 olens = dlens + m; 2170 for (i=0; i<m; i++) { 2171 jend = ii[i+1] - ii[i]; 2172 olen = 0; 2173 dlen = 0; 2174 for (j=0; j<jend; j++) { 2175 if (*jj < rstart || *jj >= rend) olen++; 2176 else dlen++; 2177 jj++; 2178 } 2179 olens[i] = olen; 2180 dlens[i] = dlen; 2181 } 2182 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2183 ierr = PetscFree(dlens);CHKERRQ(ierr); 2184 } else { 2185 int ml,nl; 2186 2187 M = *newmat; 2188 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2189 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2190 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2191 /* 2192 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2193 rather than the slower MatSetValues(). 2194 */ 2195 M->was_assembled = PETSC_TRUE; 2196 M->assembled = PETSC_FALSE; 2197 } 2198 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2199 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2200 ii = aij->i; 2201 jj = aij->j; 2202 aa = aij->a; 2203 for (i=0; i<m; i++) { 2204 row = rstart + i; 2205 nz = ii[i+1] - ii[i]; 2206 cwork = jj; jj += nz; 2207 vwork = aa; aa += nz; 2208 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2209 } 2210 2211 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2212 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2213 *newmat = M; 2214 2215 /* save submatrix used in processor for next request */ 2216 if (call == MAT_INITIAL_MATRIX) { 2217 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2218 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2219 } 2220 2221 PetscFunctionReturn(0); 2222 } 2223 2224 #undef __FUNCT__ 2225 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2226 /*@C 2227 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2228 (the default parallel PETSc format). For good matrix assembly performance 2229 the user should preallocate the matrix storage by setting the parameters 2230 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2231 performance can be increased by more than a factor of 50. 2232 2233 Collective on MPI_Comm 2234 2235 Input Parameters: 2236 + A - the matrix 2237 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2238 (same value is used for all local rows) 2239 . d_nnz - array containing the number of nonzeros in the various rows of the 2240 DIAGONAL portion of the local submatrix (possibly different for each row) 2241 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2242 The size of this array is equal to the number of local rows, i.e 'm'. 2243 You must leave room for the diagonal entry even if it is zero. 2244 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2245 submatrix (same value is used for all local rows). 2246 - o_nnz - array containing the number of nonzeros in the various rows of the 2247 OFF-DIAGONAL portion of the local submatrix (possibly different for 2248 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2249 structure. The size of this array is equal to the number 2250 of local rows, i.e 'm'. 2251 2252 The AIJ format (also called the Yale sparse matrix format or 2253 compressed row storage), is fully compatible with standard Fortran 77 2254 storage. That is, the stored row and column indices can begin at 2255 either one (as in Fortran) or zero. See the users manual for details. 2256 2257 The user MUST specify either the local or global matrix dimensions 2258 (possibly both). 2259 2260 The parallel matrix is partitioned such that the first m0 rows belong to 2261 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2262 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2263 2264 The DIAGONAL portion of the local submatrix of a processor can be defined 2265 as the submatrix which is obtained by extraction the part corresponding 2266 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2267 first row that belongs to the processor, and r2 is the last row belonging 2268 to the this processor. This is a square mxm matrix. The remaining portion 2269 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2270 2271 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2272 2273 By default, this format uses inodes (identical nodes) when possible. 2274 We search for consecutive rows with the same nonzero structure, thereby 2275 reusing matrix information to achieve increased efficiency. 2276 2277 Options Database Keys: 2278 + -mat_aij_no_inode - Do not use inodes 2279 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2280 - -mat_aij_oneindex - Internally use indexing starting at 1 2281 rather than 0. Note that when calling MatSetValues(), 2282 the user still MUST index entries starting at 0! 2283 2284 Example usage: 2285 2286 Consider the following 8x8 matrix with 34 non-zero values, that is 2287 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2288 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2289 as follows: 2290 2291 .vb 2292 1 2 0 | 0 3 0 | 0 4 2293 Proc0 0 5 6 | 7 0 0 | 8 0 2294 9 0 10 | 11 0 0 | 12 0 2295 ------------------------------------- 2296 13 0 14 | 15 16 17 | 0 0 2297 Proc1 0 18 0 | 19 20 21 | 0 0 2298 0 0 0 | 22 23 0 | 24 0 2299 ------------------------------------- 2300 Proc2 25 26 27 | 0 0 28 | 29 0 2301 30 0 0 | 31 32 33 | 0 34 2302 .ve 2303 2304 This can be represented as a collection of submatrices as: 2305 2306 .vb 2307 A B C 2308 D E F 2309 G H I 2310 .ve 2311 2312 Where the submatrices A,B,C are owned by proc0, D,E,F are 2313 owned by proc1, G,H,I are owned by proc2. 2314 2315 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2316 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2317 The 'M','N' parameters are 8,8, and have the same values on all procs. 2318 2319 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2320 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2321 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2322 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2323 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2324 matrix, ans [DF] as another SeqAIJ matrix. 2325 2326 When d_nz, o_nz parameters are specified, d_nz storage elements are 2327 allocated for every row of the local diagonal submatrix, and o_nz 2328 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2329 One way to choose d_nz and o_nz is to use the max nonzerors per local 2330 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2331 In this case, the values of d_nz,o_nz are: 2332 .vb 2333 proc0 : dnz = 2, o_nz = 2 2334 proc1 : dnz = 3, o_nz = 2 2335 proc2 : dnz = 1, o_nz = 4 2336 .ve 2337 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2338 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2339 for proc3. i.e we are using 12+15+10=37 storage locations to store 2340 34 values. 2341 2342 When d_nnz, o_nnz parameters are specified, the storage is specified 2343 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2344 In the above case the values for d_nnz,o_nnz are: 2345 .vb 2346 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2347 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2348 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2349 .ve 2350 Here the space allocated is sum of all the above values i.e 34, and 2351 hence pre-allocation is perfect. 2352 2353 Level: intermediate 2354 2355 .keywords: matrix, aij, compressed row, sparse, parallel 2356 2357 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2358 @*/ 2359 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2360 { 2361 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2362 2363 PetscFunctionBegin; 2364 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2365 if (f) { 2366 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2367 } 2368 PetscFunctionReturn(0); 2369 } 2370 2371 #undef __FUNCT__ 2372 #define __FUNCT__ "MatCreateMPIAIJ" 2373 /*@C 2374 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2375 (the default parallel PETSc format). For good matrix assembly performance 2376 the user should preallocate the matrix storage by setting the parameters 2377 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2378 performance can be increased by more than a factor of 50. 2379 2380 Collective on MPI_Comm 2381 2382 Input Parameters: 2383 + comm - MPI communicator 2384 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2385 This value should be the same as the local size used in creating the 2386 y vector for the matrix-vector product y = Ax. 2387 . n - This value should be the same as the local size used in creating the 2388 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2389 calculated if N is given) For square matrices n is almost always m. 2390 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2391 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2392 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2393 (same value is used for all local rows) 2394 . d_nnz - array containing the number of nonzeros in the various rows of the 2395 DIAGONAL portion of the local submatrix (possibly different for each row) 2396 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2397 The size of this array is equal to the number of local rows, i.e 'm'. 2398 You must leave room for the diagonal entry even if it is zero. 2399 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2400 submatrix (same value is used for all local rows). 2401 - o_nnz - array containing the number of nonzeros in the various rows of the 2402 OFF-DIAGONAL portion of the local submatrix (possibly different for 2403 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2404 structure. The size of this array is equal to the number 2405 of local rows, i.e 'm'. 2406 2407 Output Parameter: 2408 . A - the matrix 2409 2410 Notes: 2411 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2412 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2413 storage requirements for this matrix. 2414 2415 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2416 processor than it must be used on all processors that share the object for 2417 that argument. 2418 2419 The AIJ format (also called the Yale sparse matrix format or 2420 compressed row storage), is fully compatible with standard Fortran 77 2421 storage. That is, the stored row and column indices can begin at 2422 either one (as in Fortran) or zero. See the users manual for details. 2423 2424 The user MUST specify either the local or global matrix dimensions 2425 (possibly both). 2426 2427 The parallel matrix is partitioned such that the first m0 rows belong to 2428 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2429 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2430 2431 The DIAGONAL portion of the local submatrix of a processor can be defined 2432 as the submatrix which is obtained by extraction the part corresponding 2433 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2434 first row that belongs to the processor, and r2 is the last row belonging 2435 to the this processor. This is a square mxm matrix. The remaining portion 2436 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2437 2438 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2439 2440 When calling this routine with a single process communicator, a matrix of 2441 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2442 type of communicator, use the construction mechanism: 2443 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2444 2445 By default, this format uses inodes (identical nodes) when possible. 2446 We search for consecutive rows with the same nonzero structure, thereby 2447 reusing matrix information to achieve increased efficiency. 2448 2449 Options Database Keys: 2450 + -mat_aij_no_inode - Do not use inodes 2451 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2452 - -mat_aij_oneindex - Internally use indexing starting at 1 2453 rather than 0. Note that when calling MatSetValues(), 2454 the user still MUST index entries starting at 0! 2455 2456 2457 Example usage: 2458 2459 Consider the following 8x8 matrix with 34 non-zero values, that is 2460 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2461 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2462 as follows: 2463 2464 .vb 2465 1 2 0 | 0 3 0 | 0 4 2466 Proc0 0 5 6 | 7 0 0 | 8 0 2467 9 0 10 | 11 0 0 | 12 0 2468 ------------------------------------- 2469 13 0 14 | 15 16 17 | 0 0 2470 Proc1 0 18 0 | 19 20 21 | 0 0 2471 0 0 0 | 22 23 0 | 24 0 2472 ------------------------------------- 2473 Proc2 25 26 27 | 0 0 28 | 29 0 2474 30 0 0 | 31 32 33 | 0 34 2475 .ve 2476 2477 This can be represented as a collection of submatrices as: 2478 2479 .vb 2480 A B C 2481 D E F 2482 G H I 2483 .ve 2484 2485 Where the submatrices A,B,C are owned by proc0, D,E,F are 2486 owned by proc1, G,H,I are owned by proc2. 2487 2488 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2489 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2490 The 'M','N' parameters are 8,8, and have the same values on all procs. 2491 2492 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2493 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2494 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2495 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2496 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2497 matrix, ans [DF] as another SeqAIJ matrix. 2498 2499 When d_nz, o_nz parameters are specified, d_nz storage elements are 2500 allocated for every row of the local diagonal submatrix, and o_nz 2501 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2502 One way to choose d_nz and o_nz is to use the max nonzerors per local 2503 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2504 In this case, the values of d_nz,o_nz are: 2505 .vb 2506 proc0 : dnz = 2, o_nz = 2 2507 proc1 : dnz = 3, o_nz = 2 2508 proc2 : dnz = 1, o_nz = 4 2509 .ve 2510 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2511 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2512 for proc3. i.e we are using 12+15+10=37 storage locations to store 2513 34 values. 2514 2515 When d_nnz, o_nnz parameters are specified, the storage is specified 2516 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2517 In the above case the values for d_nnz,o_nnz are: 2518 .vb 2519 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2520 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2521 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2522 .ve 2523 Here the space allocated is sum of all the above values i.e 34, and 2524 hence pre-allocation is perfect. 2525 2526 Level: intermediate 2527 2528 .keywords: matrix, aij, compressed row, sparse, parallel 2529 2530 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2531 @*/ 2532 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2533 { 2534 int ierr,size; 2535 2536 PetscFunctionBegin; 2537 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2538 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2539 if (size > 1) { 2540 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2541 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2542 } else { 2543 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2544 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2545 } 2546 PetscFunctionReturn(0); 2547 } 2548 2549 #undef __FUNCT__ 2550 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2551 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2552 { 2553 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2554 PetscFunctionBegin; 2555 *Ad = a->A; 2556 *Ao = a->B; 2557 *colmap = a->garray; 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2563 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2564 { 2565 int ierr,i; 2566 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2567 2568 PetscFunctionBegin; 2569 if (coloring->ctype == IS_COLORING_LOCAL) { 2570 ISColoringValue *allcolors,*colors; 2571 ISColoring ocoloring; 2572 2573 /* set coloring for diagonal portion */ 2574 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2575 2576 /* set coloring for off-diagonal portion */ 2577 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2578 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2579 for (i=0; i<a->B->n; i++) { 2580 colors[i] = allcolors[a->garray[i]]; 2581 } 2582 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2583 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2584 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2585 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2586 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2587 ISColoringValue *colors; 2588 int *larray; 2589 ISColoring ocoloring; 2590 2591 /* set coloring for diagonal portion */ 2592 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2593 for (i=0; i<a->A->n; i++) { 2594 larray[i] = i + a->cstart; 2595 } 2596 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2597 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2598 for (i=0; i<a->A->n; i++) { 2599 colors[i] = coloring->colors[larray[i]]; 2600 } 2601 ierr = PetscFree(larray);CHKERRQ(ierr); 2602 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2603 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2604 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2605 2606 /* set coloring for off-diagonal portion */ 2607 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2608 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2609 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2610 for (i=0; i<a->B->n; i++) { 2611 colors[i] = coloring->colors[larray[i]]; 2612 } 2613 ierr = PetscFree(larray);CHKERRQ(ierr); 2614 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2615 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2616 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2617 } else { 2618 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2619 } 2620 2621 PetscFunctionReturn(0); 2622 } 2623 2624 #undef __FUNCT__ 2625 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2626 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2627 { 2628 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2629 int ierr; 2630 2631 PetscFunctionBegin; 2632 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2633 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2634 PetscFunctionReturn(0); 2635 } 2636 2637 #undef __FUNCT__ 2638 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2639 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2640 { 2641 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2642 int ierr; 2643 2644 PetscFunctionBegin; 2645 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2646 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2647 PetscFunctionReturn(0); 2648 } 2649 2650 #undef __FUNCT__ 2651 #define __FUNCT__ "MatMerge" 2652 /*@C 2653 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2654 matrices from each processor 2655 2656 Collective on MPI_Comm 2657 2658 Input Parameters: 2659 + comm - the communicators the parallel matrix will live on 2660 - inmat - the input sequential matrices 2661 2662 Output Parameter: 2663 . outmat - the parallel matrix generated 2664 2665 Level: advanced 2666 2667 Notes: The number of columns of the matrix in EACH of the seperate files 2668 MUST be the same. 2669 2670 @*/ 2671 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2672 { 2673 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2674 PetscScalar *values; 2675 PetscMap columnmap,rowmap; 2676 2677 PetscFunctionBegin; 2678 2679 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2680 2681 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2682 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2683 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2684 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2685 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2686 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2687 2688 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2689 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2690 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2691 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2692 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2693 2694 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2695 for (i=0;i<m;i++) { 2696 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2697 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2698 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2699 } 2700 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2701 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2702 2703 for (i=0;i<m;i++) { 2704 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2705 I = i + rstart; 2706 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2707 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2708 } 2709 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2710 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2711 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2712 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatFileSplit" 2718 int MatFileSplit(Mat A,char *outfile) 2719 { 2720 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2721 PetscViewer out; 2722 char *name; 2723 Mat B; 2724 PetscScalar *values; 2725 2726 PetscFunctionBegin; 2727 2728 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2729 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2730 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2731 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2732 for (i=0;i<m;i++) { 2733 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2734 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2735 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2736 } 2737 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2738 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2739 2740 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2741 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2742 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2743 sprintf(name,"%s.%d",outfile,rank); 2744 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2745 ierr = PetscFree(name); 2746 ierr = MatView(B,out);CHKERRQ(ierr); 2747 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2748 ierr = MatDestroy(B);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751