1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 #if defined(PETSC_HAVE_SUPERLUDIST) 413 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 414 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 415 #endif 416 417 #if defined(PETSC_HAVE_SPOOLES) 418 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 419 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 420 #endif 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 426 int MatZeroEntries_MPIAIJ(Mat A) 427 { 428 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 429 int ierr; 430 431 PetscFunctionBegin; 432 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 433 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 434 PetscFunctionReturn(0); 435 } 436 437 #undef __FUNCT__ 438 #define __FUNCT__ "MatZeroRows_MPIAIJ" 439 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 440 { 441 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 442 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 443 int *nprocs,j,idx,nsends,row; 444 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 445 int *rvalues,tag = A->tag,count,base,slen,n,*source; 446 int *lens,imdex,*lrows,*values,rstart=l->rstart; 447 MPI_Comm comm = A->comm; 448 MPI_Request *send_waits,*recv_waits; 449 MPI_Status recv_status,*send_status; 450 IS istmp; 451 PetscTruth found; 452 453 PetscFunctionBegin; 454 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 455 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 456 457 /* first count number of contributors to each processor */ 458 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 459 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 460 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 461 for (i=0; i<N; i++) { 462 idx = rows[i]; 463 found = PETSC_FALSE; 464 for (j=0; j<size; j++) { 465 if (idx >= owners[j] && idx < owners[j+1]) { 466 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 467 } 468 } 469 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 470 } 471 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 472 473 /* inform other processors of number of messages and max length*/ 474 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 475 476 /* post receives: */ 477 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 478 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 479 for (i=0; i<nrecvs; i++) { 480 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 481 } 482 483 /* do sends: 484 1) starts[i] gives the starting index in svalues for stuff going to 485 the ith processor 486 */ 487 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 488 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 489 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 490 starts[0] = 0; 491 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 492 for (i=0; i<N; i++) { 493 svalues[starts[owner[i]]++] = rows[i]; 494 } 495 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 496 497 starts[0] = 0; 498 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 499 count = 0; 500 for (i=0; i<size; i++) { 501 if (nprocs[2*i+1]) { 502 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 503 } 504 } 505 ierr = PetscFree(starts);CHKERRQ(ierr); 506 507 base = owners[rank]; 508 509 /* wait on receives */ 510 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 511 source = lens + nrecvs; 512 count = nrecvs; slen = 0; 513 while (count) { 514 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 515 /* unpack receives into our local space */ 516 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 517 source[imdex] = recv_status.MPI_SOURCE; 518 lens[imdex] = n; 519 slen += n; 520 count--; 521 } 522 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 523 524 /* move the data into the send scatter */ 525 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 526 count = 0; 527 for (i=0; i<nrecvs; i++) { 528 values = rvalues + i*nmax; 529 for (j=0; j<lens[i]; j++) { 530 lrows[count++] = values[j] - base; 531 } 532 } 533 ierr = PetscFree(rvalues);CHKERRQ(ierr); 534 ierr = PetscFree(lens);CHKERRQ(ierr); 535 ierr = PetscFree(owner);CHKERRQ(ierr); 536 ierr = PetscFree(nprocs);CHKERRQ(ierr); 537 538 /* actually zap the local rows */ 539 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 540 PetscLogObjectParent(A,istmp); 541 542 /* 543 Zero the required rows. If the "diagonal block" of the matrix 544 is square and the user wishes to set the diagonal we use seperate 545 code so that MatSetValues() is not called for each diagonal allocating 546 new memory, thus calling lots of mallocs and slowing things down. 547 548 Contributed by: Mathew Knepley 549 */ 550 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 551 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 552 if (diag && (l->A->M == l->A->N)) { 553 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 554 } else if (diag) { 555 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 556 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 557 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 558 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 559 } 560 for (i = 0; i < slen; i++) { 561 row = lrows[i] + rstart; 562 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 563 } 564 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 565 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 566 } else { 567 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 568 } 569 ierr = ISDestroy(istmp);CHKERRQ(ierr); 570 ierr = PetscFree(lrows);CHKERRQ(ierr); 571 572 /* wait on sends */ 573 if (nsends) { 574 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 575 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 576 ierr = PetscFree(send_status);CHKERRQ(ierr); 577 } 578 ierr = PetscFree(send_waits);CHKERRQ(ierr); 579 ierr = PetscFree(svalues);CHKERRQ(ierr); 580 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatMult_MPIAIJ" 586 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 587 { 588 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 589 int ierr,nt; 590 591 PetscFunctionBegin; 592 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 593 if (nt != A->n) { 594 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 595 } 596 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 597 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 598 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 603 #undef __FUNCT__ 604 #define __FUNCT__ "MatMultAdd_MPIAIJ" 605 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 606 { 607 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 608 int ierr; 609 610 PetscFunctionBegin; 611 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 612 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 613 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 614 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 618 #undef __FUNCT__ 619 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 620 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 621 { 622 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 623 int ierr; 624 625 PetscFunctionBegin; 626 /* do nondiagonal part */ 627 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 628 /* send it on its way */ 629 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 /* do local part */ 631 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 632 /* receive remote parts: note this assumes the values are not actually */ 633 /* inserted in yy until the next line, which is true for my implementation*/ 634 /* but is not perhaps always true. */ 635 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 639 EXTERN_C_BEGIN 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 642 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 643 { 644 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 645 Mat A = Aij->A, B,Aoff = Aij->B,Boff; 646 MatType type; 647 IS Me,Notme; 648 int M,N,first,last,*notme,i, ierr; 649 650 PetscFunctionBegin; 651 ierr = MatGetType(Bmat,&type); CHKERRQ(ierr); 652 ierr = PetscStrcmp(type,MATMPIAIJ,f); CHKERRQ(ierr); 653 if (!*f) SETERRQ(1,"Second matrix needs to be SeqAIJ too"); 654 ierr = MatIsSymmetric(A,B,f); CHKERRQ(ierr); 655 if (!*f) PetscFunctionReturn(0); 656 Bij = (Mat_MPIAIJ *) Bmat->data; B = Bij->B; 657 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 658 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 659 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 660 for (i=0; i<first; i++) notme[i] = i; 661 for (i=last; i<M; i++) notme[i-last+first] = i; 662 ierr = ISCreateGeneral 663 (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 664 ierr = ISCreateStride 665 (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 666 ierr = MatGetSubMatrix 667 (B,Notme,Me,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Boff); CHKERRQ(ierr); 668 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 669 ierr = MatDestroy(Boff); CHKERRQ(ierr); 670 PetscFunctionReturn(0); 671 } 672 EXTERN_C_END 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 676 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 679 int ierr; 680 681 PetscFunctionBegin; 682 /* do nondiagonal part */ 683 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 684 /* send it on its way */ 685 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 686 /* do local part */ 687 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 688 /* receive remote parts: note this assumes the values are not actually */ 689 /* inserted in yy until the next line, which is true for my implementation*/ 690 /* but is not perhaps always true. */ 691 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 /* 696 This only works correctly for square matrices where the subblock A->A is the 697 diagonal block 698 */ 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 701 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 702 { 703 int ierr; 704 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 705 706 PetscFunctionBegin; 707 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 708 if (a->rstart != a->cstart || a->rend != a->cend) { 709 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 710 } 711 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatScale_MPIAIJ" 717 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 int ierr; 721 722 PetscFunctionBegin; 723 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 724 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatDestroy_MPIAIJ" 730 int MatDestroy_MPIAIJ(Mat mat) 731 { 732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 733 int ierr; 734 735 PetscFunctionBegin; 736 #if defined(PETSC_USE_LOG) 737 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 738 #endif 739 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 740 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 741 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 742 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 743 #if defined (PETSC_USE_CTABLE) 744 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 745 #else 746 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 747 #endif 748 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 749 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 750 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 751 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 752 ierr = PetscFree(aij);CHKERRQ(ierr); 753 PetscFunctionReturn(0); 754 } 755 756 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 757 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 758 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatView_MPIAIJ_Binary" 761 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 762 { 763 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 764 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 765 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 766 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 767 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 768 PetscScalar *column_values; 769 770 PetscFunctionBegin; 771 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 772 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 773 nz = A->nz + B->nz; 774 if (rank == 0) { 775 header[0] = MAT_FILE_COOKIE; 776 header[1] = mat->M; 777 header[2] = mat->N; 778 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 779 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 780 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 781 /* get largest number of rows any processor has */ 782 rlen = mat->m; 783 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 784 for (i=1; i<size; i++) { 785 rlen = PetscMax(rlen,range[i+1] - range[i]); 786 } 787 } else { 788 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 789 rlen = mat->m; 790 } 791 792 /* load up the local row counts */ 793 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 794 for (i=0; i<mat->m; i++) { 795 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 796 } 797 798 /* store the row lengths to the file */ 799 if (rank == 0) { 800 MPI_Status status; 801 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 802 for (i=1; i<size; i++) { 803 rlen = range[i+1] - range[i]; 804 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 805 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 806 } 807 } else { 808 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 811 812 /* load up the local column indices */ 813 nzmax = nz; /* )th processor needs space a largest processor needs */ 814 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 815 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 816 cnt = 0; 817 for (i=0; i<mat->m; i++) { 818 for (j=B->i[i]; j<B->i[i+1]; j++) { 819 if ( (col = garray[B->j[j]]) > cstart) break; 820 column_indices[cnt++] = col; 821 } 822 for (k=A->i[i]; k<A->i[i+1]; k++) { 823 column_indices[cnt++] = A->j[k] + cstart; 824 } 825 for (; j<B->i[i+1]; j++) { 826 column_indices[cnt++] = garray[B->j[j]]; 827 } 828 } 829 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 830 831 /* store the column indices to the file */ 832 if (rank == 0) { 833 MPI_Status status; 834 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 835 for (i=1; i<size; i++) { 836 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 837 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 838 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 839 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 840 } 841 } else { 842 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 843 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 844 } 845 ierr = PetscFree(column_indices);CHKERRQ(ierr); 846 847 /* load up the local column values */ 848 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 849 cnt = 0; 850 for (i=0; i<mat->m; i++) { 851 for (j=B->i[i]; j<B->i[i+1]; j++) { 852 if ( garray[B->j[j]] > cstart) break; 853 column_values[cnt++] = B->a[j]; 854 } 855 for (k=A->i[i]; k<A->i[i+1]; k++) { 856 column_values[cnt++] = A->a[k]; 857 } 858 for (; j<B->i[i+1]; j++) { 859 column_values[cnt++] = B->a[j]; 860 } 861 } 862 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 863 864 /* store the column values to the file */ 865 if (rank == 0) { 866 MPI_Status status; 867 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 868 for (i=1; i<size; i++) { 869 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 870 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 871 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 872 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 873 } 874 } else { 875 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 876 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 877 } 878 ierr = PetscFree(column_values);CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 884 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 885 { 886 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 887 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 888 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 889 PetscTruth isdraw,isascii,flg,isbinary; 890 PetscViewer sviewer; 891 PetscViewerFormat format; 892 893 PetscFunctionBegin; 894 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 895 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 896 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 897 if (isascii) { 898 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 899 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 900 MatInfo info; 901 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 902 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 903 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 904 if (flg) { 905 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 906 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 907 } else { 908 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 909 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 910 } 911 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 912 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 913 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 914 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 915 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 916 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } else if (format == PETSC_VIEWER_ASCII_INFO) { 919 PetscFunctionReturn(0); 920 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 921 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 922 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 923 #endif 924 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 925 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 926 #endif 927 928 PetscFunctionReturn(0); 929 } 930 } else if (isbinary) { 931 if (size == 1) { 932 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 933 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 934 } else { 935 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 936 } 937 PetscFunctionReturn(0); 938 } else if (isdraw) { 939 PetscDraw draw; 940 PetscTruth isnull; 941 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 942 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 943 } 944 945 if (size == 1) { 946 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 947 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 948 } else { 949 /* assemble the entire matrix onto first processor. */ 950 Mat A; 951 Mat_SeqAIJ *Aloc; 952 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 953 PetscScalar *a; 954 955 if (!rank) { 956 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 957 } else { 958 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 959 } 960 PetscLogObjectParent(mat,A); 961 962 /* copy over the A part */ 963 Aloc = (Mat_SeqAIJ*)aij->A->data; 964 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 965 row = aij->rstart; 966 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 967 for (i=0; i<m; i++) { 968 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 969 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 970 } 971 aj = Aloc->j; 972 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 973 974 /* copy over the B part */ 975 Aloc = (Mat_SeqAIJ*)aij->B->data; 976 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 977 row = aij->rstart; 978 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 979 ct = cols; 980 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 981 for (i=0; i<m; i++) { 982 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 983 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 984 } 985 ierr = PetscFree(ct);CHKERRQ(ierr); 986 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 988 /* 989 Everyone has to call to draw the matrix since the graphics waits are 990 synchronized across all processors that share the PetscDraw object 991 */ 992 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 993 if (!rank) { 994 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 995 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 996 } 997 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 998 ierr = MatDestroy(A);CHKERRQ(ierr); 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatView_MPIAIJ" 1005 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1006 { 1007 int ierr; 1008 PetscTruth isascii,isdraw,issocket,isbinary; 1009 1010 PetscFunctionBegin; 1011 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1012 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1013 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1014 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1015 if (isascii || isdraw || isbinary || issocket) { 1016 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1017 } else { 1018 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1019 } 1020 PetscFunctionReturn(0); 1021 } 1022 1023 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatRelax_MPIAIJ" 1027 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1028 { 1029 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1030 int ierr; 1031 Vec bb1; 1032 PetscScalar mone=-1.0; 1033 1034 PetscFunctionBegin; 1035 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1036 1037 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1038 1039 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1040 if (flag & SOR_ZERO_INITIAL_GUESS) { 1041 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1042 its--; 1043 } 1044 1045 while (its--) { 1046 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1047 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1048 1049 /* update rhs: bb1 = bb - B*x */ 1050 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1051 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1052 1053 /* local sweep */ 1054 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1055 CHKERRQ(ierr); 1056 } 1057 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1058 if (flag & SOR_ZERO_INITIAL_GUESS) { 1059 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1060 its--; 1061 } 1062 while (its--) { 1063 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1064 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1065 1066 /* update rhs: bb1 = bb - B*x */ 1067 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1068 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1069 1070 /* local sweep */ 1071 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1072 CHKERRQ(ierr); 1073 } 1074 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1075 if (flag & SOR_ZERO_INITIAL_GUESS) { 1076 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1077 its--; 1078 } 1079 while (its--) { 1080 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1081 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1082 1083 /* update rhs: bb1 = bb - B*x */ 1084 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1085 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1086 1087 /* local sweep */ 1088 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1089 CHKERRQ(ierr); 1090 } 1091 } else { 1092 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1093 } 1094 1095 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1101 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1104 Mat A = mat->A,B = mat->B; 1105 int ierr; 1106 PetscReal isend[5],irecv[5]; 1107 1108 PetscFunctionBegin; 1109 info->block_size = 1.0; 1110 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1111 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1112 isend[3] = info->memory; isend[4] = info->mallocs; 1113 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1114 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1115 isend[3] += info->memory; isend[4] += info->mallocs; 1116 if (flag == MAT_LOCAL) { 1117 info->nz_used = isend[0]; 1118 info->nz_allocated = isend[1]; 1119 info->nz_unneeded = isend[2]; 1120 info->memory = isend[3]; 1121 info->mallocs = isend[4]; 1122 } else if (flag == MAT_GLOBAL_MAX) { 1123 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1124 info->nz_used = irecv[0]; 1125 info->nz_allocated = irecv[1]; 1126 info->nz_unneeded = irecv[2]; 1127 info->memory = irecv[3]; 1128 info->mallocs = irecv[4]; 1129 } else if (flag == MAT_GLOBAL_SUM) { 1130 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1131 info->nz_used = irecv[0]; 1132 info->nz_allocated = irecv[1]; 1133 info->nz_unneeded = irecv[2]; 1134 info->memory = irecv[3]; 1135 info->mallocs = irecv[4]; 1136 } 1137 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1138 info->fill_ratio_needed = 0; 1139 info->factor_mallocs = 0; 1140 info->rows_global = (double)matin->M; 1141 info->columns_global = (double)matin->N; 1142 info->rows_local = (double)matin->m; 1143 info->columns_local = (double)matin->N; 1144 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatSetOption_MPIAIJ" 1150 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1151 { 1152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1153 int ierr; 1154 1155 PetscFunctionBegin; 1156 switch (op) { 1157 case MAT_NO_NEW_NONZERO_LOCATIONS: 1158 case MAT_YES_NEW_NONZERO_LOCATIONS: 1159 case MAT_COLUMNS_UNSORTED: 1160 case MAT_COLUMNS_SORTED: 1161 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1162 case MAT_KEEP_ZEROED_ROWS: 1163 case MAT_NEW_NONZERO_LOCATION_ERR: 1164 case MAT_USE_INODES: 1165 case MAT_DO_NOT_USE_INODES: 1166 case MAT_IGNORE_ZERO_ENTRIES: 1167 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1168 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1169 break; 1170 case MAT_ROW_ORIENTED: 1171 a->roworiented = PETSC_TRUE; 1172 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1173 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1174 break; 1175 case MAT_ROWS_SORTED: 1176 case MAT_ROWS_UNSORTED: 1177 case MAT_YES_NEW_DIAGONALS: 1178 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1179 break; 1180 case MAT_COLUMN_ORIENTED: 1181 a->roworiented = PETSC_FALSE; 1182 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1183 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1184 break; 1185 case MAT_IGNORE_OFF_PROC_ENTRIES: 1186 a->donotstash = PETSC_TRUE; 1187 break; 1188 case MAT_NO_NEW_DIAGONALS: 1189 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1190 default: 1191 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1192 } 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatGetRow_MPIAIJ" 1198 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1199 { 1200 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1201 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1202 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1203 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1204 int *cmap,*idx_p; 1205 1206 PetscFunctionBegin; 1207 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1208 mat->getrowactive = PETSC_TRUE; 1209 1210 if (!mat->rowvalues && (idx || v)) { 1211 /* 1212 allocate enough space to hold information from the longest row. 1213 */ 1214 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1215 int max = 1,tmp; 1216 for (i=0; i<matin->m; i++) { 1217 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1218 if (max < tmp) { max = tmp; } 1219 } 1220 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1221 mat->rowindices = (int*)(mat->rowvalues + max); 1222 } 1223 1224 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1225 lrow = row - rstart; 1226 1227 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1228 if (!v) {pvA = 0; pvB = 0;} 1229 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1230 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1231 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1232 nztot = nzA + nzB; 1233 1234 cmap = mat->garray; 1235 if (v || idx) { 1236 if (nztot) { 1237 /* Sort by increasing column numbers, assuming A and B already sorted */ 1238 int imark = -1; 1239 if (v) { 1240 *v = v_p = mat->rowvalues; 1241 for (i=0; i<nzB; i++) { 1242 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1243 else break; 1244 } 1245 imark = i; 1246 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1247 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1248 } 1249 if (idx) { 1250 *idx = idx_p = mat->rowindices; 1251 if (imark > -1) { 1252 for (i=0; i<imark; i++) { 1253 idx_p[i] = cmap[cworkB[i]]; 1254 } 1255 } else { 1256 for (i=0; i<nzB; i++) { 1257 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1258 else break; 1259 } 1260 imark = i; 1261 } 1262 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1263 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1264 } 1265 } else { 1266 if (idx) *idx = 0; 1267 if (v) *v = 0; 1268 } 1269 } 1270 *nz = nztot; 1271 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1272 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1278 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1279 { 1280 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1281 1282 PetscFunctionBegin; 1283 if (aij->getrowactive == PETSC_FALSE) { 1284 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1285 } 1286 aij->getrowactive = PETSC_FALSE; 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatNorm_MPIAIJ" 1292 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1293 { 1294 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1295 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1296 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1297 PetscReal sum = 0.0; 1298 PetscScalar *v; 1299 1300 PetscFunctionBegin; 1301 if (aij->size == 1) { 1302 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1303 } else { 1304 if (type == NORM_FROBENIUS) { 1305 v = amat->a; 1306 for (i=0; i<amat->nz; i++) { 1307 #if defined(PETSC_USE_COMPLEX) 1308 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1309 #else 1310 sum += (*v)*(*v); v++; 1311 #endif 1312 } 1313 v = bmat->a; 1314 for (i=0; i<bmat->nz; i++) { 1315 #if defined(PETSC_USE_COMPLEX) 1316 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1317 #else 1318 sum += (*v)*(*v); v++; 1319 #endif 1320 } 1321 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1322 *norm = sqrt(*norm); 1323 } else if (type == NORM_1) { /* max column norm */ 1324 PetscReal *tmp,*tmp2; 1325 int *jj,*garray = aij->garray; 1326 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1327 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1328 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1329 *norm = 0.0; 1330 v = amat->a; jj = amat->j; 1331 for (j=0; j<amat->nz; j++) { 1332 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1333 } 1334 v = bmat->a; jj = bmat->j; 1335 for (j=0; j<bmat->nz; j++) { 1336 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1337 } 1338 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1339 for (j=0; j<mat->N; j++) { 1340 if (tmp2[j] > *norm) *norm = tmp2[j]; 1341 } 1342 ierr = PetscFree(tmp);CHKERRQ(ierr); 1343 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1344 } else if (type == NORM_INFINITY) { /* max row norm */ 1345 PetscReal ntemp = 0.0; 1346 for (j=0; j<aij->A->m; j++) { 1347 v = amat->a + amat->i[j] + shift; 1348 sum = 0.0; 1349 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1350 sum += PetscAbsScalar(*v); v++; 1351 } 1352 v = bmat->a + bmat->i[j] + shift; 1353 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1354 sum += PetscAbsScalar(*v); v++; 1355 } 1356 if (sum > ntemp) ntemp = sum; 1357 } 1358 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1359 } else { 1360 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1361 } 1362 } 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatTranspose_MPIAIJ" 1368 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1369 { 1370 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1371 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1372 int ierr,shift = Aloc->indexshift; 1373 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1374 Mat B; 1375 PetscScalar *array; 1376 1377 PetscFunctionBegin; 1378 if (!matout && M != N) { 1379 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1380 } 1381 1382 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1383 1384 /* copy over the A part */ 1385 Aloc = (Mat_SeqAIJ*)a->A->data; 1386 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1387 row = a->rstart; 1388 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1389 for (i=0; i<m; i++) { 1390 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1391 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1392 } 1393 aj = Aloc->j; 1394 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1395 1396 /* copy over the B part */ 1397 Aloc = (Mat_SeqAIJ*)a->B->data; 1398 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1399 row = a->rstart; 1400 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1401 ct = cols; 1402 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1403 for (i=0; i<m; i++) { 1404 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1405 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1406 } 1407 ierr = PetscFree(ct);CHKERRQ(ierr); 1408 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1409 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1410 if (matout) { 1411 *matout = B; 1412 } else { 1413 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1414 } 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1420 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1421 { 1422 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1423 Mat a = aij->A,b = aij->B; 1424 int ierr,s1,s2,s3; 1425 1426 PetscFunctionBegin; 1427 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1428 if (rr) { 1429 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1430 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1431 /* Overlap communication with computation. */ 1432 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1433 } 1434 if (ll) { 1435 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1436 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1437 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1438 } 1439 /* scale the diagonal block */ 1440 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1441 1442 if (rr) { 1443 /* Do a scatter end and then right scale the off-diagonal block */ 1444 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1445 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1446 } 1447 1448 PetscFunctionReturn(0); 1449 } 1450 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1454 int MatPrintHelp_MPIAIJ(Mat A) 1455 { 1456 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1457 int ierr; 1458 1459 PetscFunctionBegin; 1460 if (!a->rank) { 1461 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1468 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1469 { 1470 PetscFunctionBegin; 1471 *bs = 1; 1472 PetscFunctionReturn(0); 1473 } 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1476 int MatSetUnfactored_MPIAIJ(Mat A) 1477 { 1478 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1479 int ierr; 1480 1481 PetscFunctionBegin; 1482 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatEqual_MPIAIJ" 1488 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1489 { 1490 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1491 Mat a,b,c,d; 1492 PetscTruth flg; 1493 int ierr; 1494 1495 PetscFunctionBegin; 1496 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1497 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1498 a = matA->A; b = matA->B; 1499 c = matB->A; d = matB->B; 1500 1501 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1502 if (flg == PETSC_TRUE) { 1503 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1504 } 1505 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatCopy_MPIAIJ" 1511 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1512 { 1513 int ierr; 1514 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1515 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1516 PetscTruth flg; 1517 1518 PetscFunctionBegin; 1519 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1520 if (str != SAME_NONZERO_PATTERN || !flg) { 1521 /* because of the column compression in the off-processor part of the matrix a->B, 1522 the number of columns in a->B and b->B may be different, hence we cannot call 1523 the MatCopy() directly on the two parts. If need be, we can provide a more 1524 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1525 then copying the submatrices */ 1526 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1527 } else { 1528 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1529 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1530 } 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1536 int MatSetUpPreallocation_MPIAIJ(Mat A) 1537 { 1538 int ierr; 1539 1540 PetscFunctionBegin; 1541 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1546 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1547 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1548 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1549 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1550 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1551 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1552 #endif 1553 1554 #include "petscblaslapack.h" 1555 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure); 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatAXPY_MPIAIJ" 1558 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1559 { 1560 int ierr,one=1,i; 1561 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1562 Mat_SeqAIJ *x,*y; 1563 1564 PetscFunctionBegin; 1565 if (str == SAME_NONZERO_PATTERN) { 1566 x = (Mat_SeqAIJ *)xx->A->data; 1567 y = (Mat_SeqAIJ *)yy->A->data; 1568 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1569 x = (Mat_SeqAIJ *)xx->B->data; 1570 y = (Mat_SeqAIJ *)yy->B->data; 1571 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1572 } else if (str == SUBSET_NONZERO_PATTERN) { 1573 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1574 1575 x = (Mat_SeqAIJ *)xx->B->data; 1576 y = (Mat_SeqAIJ *)yy->B->data; 1577 if (y->xtoy && y->XtoY != xx->B) { 1578 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1579 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1580 } 1581 if (!y->xtoy) { /* get xtoy */ 1582 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1583 y->XtoY = xx->B; 1584 } 1585 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1586 } else { 1587 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1588 } 1589 PetscFunctionReturn(0); 1590 } 1591 1592 /* -------------------------------------------------------------------*/ 1593 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1594 MatGetRow_MPIAIJ, 1595 MatRestoreRow_MPIAIJ, 1596 MatMult_MPIAIJ, 1597 MatMultAdd_MPIAIJ, 1598 MatMultTranspose_MPIAIJ, 1599 MatMultTransposeAdd_MPIAIJ, 1600 0, 1601 0, 1602 0, 1603 0, 1604 0, 1605 0, 1606 MatRelax_MPIAIJ, 1607 MatTranspose_MPIAIJ, 1608 MatGetInfo_MPIAIJ, 1609 MatEqual_MPIAIJ, 1610 MatGetDiagonal_MPIAIJ, 1611 MatDiagonalScale_MPIAIJ, 1612 MatNorm_MPIAIJ, 1613 MatAssemblyBegin_MPIAIJ, 1614 MatAssemblyEnd_MPIAIJ, 1615 0, 1616 MatSetOption_MPIAIJ, 1617 MatZeroEntries_MPIAIJ, 1618 MatZeroRows_MPIAIJ, 1619 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1620 MatLUFactorSymbolic_MPIAIJ_TFS, 1621 #else 1622 0, 1623 #endif 1624 0, 1625 0, 1626 0, 1627 MatSetUpPreallocation_MPIAIJ, 1628 0, 1629 0, 1630 0, 1631 0, 1632 MatDuplicate_MPIAIJ, 1633 0, 1634 0, 1635 0, 1636 0, 1637 MatAXPY_MPIAIJ, 1638 MatGetSubMatrices_MPIAIJ, 1639 MatIncreaseOverlap_MPIAIJ, 1640 MatGetValues_MPIAIJ, 1641 MatCopy_MPIAIJ, 1642 MatPrintHelp_MPIAIJ, 1643 MatScale_MPIAIJ, 1644 0, 1645 0, 1646 0, 1647 MatGetBlockSize_MPIAIJ, 1648 0, 1649 0, 1650 0, 1651 0, 1652 MatFDColoringCreate_MPIAIJ, 1653 0, 1654 MatSetUnfactored_MPIAIJ, 1655 0, 1656 0, 1657 MatGetSubMatrix_MPIAIJ, 1658 MatDestroy_MPIAIJ, 1659 MatView_MPIAIJ, 1660 MatGetPetscMaps_Petsc, 1661 0, 1662 0, 1663 0, 1664 0, 1665 0, 1666 0, 1667 0, 1668 0, 1669 MatSetColoring_MPIAIJ, 1670 MatSetValuesAdic_MPIAIJ, 1671 MatSetValuesAdifor_MPIAIJ 1672 }; 1673 1674 /* ----------------------------------------------------------------------------------------*/ 1675 1676 EXTERN_C_BEGIN 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1679 int MatStoreValues_MPIAIJ(Mat mat) 1680 { 1681 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1682 int ierr; 1683 1684 PetscFunctionBegin; 1685 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1686 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 EXTERN_C_END 1690 1691 EXTERN_C_BEGIN 1692 #undef __FUNCT__ 1693 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1694 int MatRetrieveValues_MPIAIJ(Mat mat) 1695 { 1696 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1697 int ierr; 1698 1699 PetscFunctionBegin; 1700 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1701 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 EXTERN_C_END 1705 1706 #include "petscpc.h" 1707 EXTERN_C_BEGIN 1708 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1709 EXTERN_C_END 1710 1711 EXTERN_C_BEGIN 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "MatCreate_MPIAIJ" 1714 int MatCreate_MPIAIJ(Mat B) 1715 { 1716 Mat_MPIAIJ *b; 1717 int ierr,i,size; 1718 1719 PetscFunctionBegin; 1720 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1721 1722 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1723 B->data = (void*)b; 1724 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1725 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1726 B->factor = 0; 1727 B->assembled = PETSC_FALSE; 1728 B->mapping = 0; 1729 1730 B->insertmode = NOT_SET_VALUES; 1731 b->size = size; 1732 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1733 1734 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1735 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1736 1737 /* the information in the maps duplicates the information computed below, eventually 1738 we should remove the duplicate information that is not contained in the maps */ 1739 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1740 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1741 1742 /* build local table of row and column ownerships */ 1743 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1744 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1745 b->cowners = b->rowners + b->size + 2; 1746 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1747 b->rowners[0] = 0; 1748 for (i=2; i<=b->size; i++) { 1749 b->rowners[i] += b->rowners[i-1]; 1750 } 1751 b->rstart = b->rowners[b->rank]; 1752 b->rend = b->rowners[b->rank+1]; 1753 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1754 b->cowners[0] = 0; 1755 for (i=2; i<=b->size; i++) { 1756 b->cowners[i] += b->cowners[i-1]; 1757 } 1758 b->cstart = b->cowners[b->rank]; 1759 b->cend = b->cowners[b->rank+1]; 1760 1761 /* build cache for off array entries formed */ 1762 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1763 b->donotstash = PETSC_FALSE; 1764 b->colmap = 0; 1765 b->garray = 0; 1766 b->roworiented = PETSC_TRUE; 1767 1768 /* stuff used for matrix vector multiply */ 1769 b->lvec = PETSC_NULL; 1770 b->Mvctx = PETSC_NULL; 1771 1772 /* stuff for MatGetRow() */ 1773 b->rowindices = 0; 1774 b->rowvalues = 0; 1775 b->getrowactive = PETSC_FALSE; 1776 1777 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1778 "MatStoreValues_MPIAIJ", 1779 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1780 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1781 "MatRetrieveValues_MPIAIJ", 1782 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1783 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1784 "MatGetDiagonalBlock_MPIAIJ", 1785 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1786 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1787 "MatIsSymmetric_MPIAIJ", 1788 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1789 PetscFunctionReturn(0); 1790 } 1791 EXTERN_C_END 1792 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1795 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1796 { 1797 Mat mat; 1798 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1799 int ierr; 1800 1801 PetscFunctionBegin; 1802 *newmat = 0; 1803 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1804 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1805 a = (Mat_MPIAIJ*)mat->data; 1806 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1807 mat->factor = matin->factor; 1808 mat->assembled = PETSC_TRUE; 1809 mat->insertmode = NOT_SET_VALUES; 1810 mat->preallocated = PETSC_TRUE; 1811 1812 a->rstart = oldmat->rstart; 1813 a->rend = oldmat->rend; 1814 a->cstart = oldmat->cstart; 1815 a->cend = oldmat->cend; 1816 a->size = oldmat->size; 1817 a->rank = oldmat->rank; 1818 a->donotstash = oldmat->donotstash; 1819 a->roworiented = oldmat->roworiented; 1820 a->rowindices = 0; 1821 a->rowvalues = 0; 1822 a->getrowactive = PETSC_FALSE; 1823 1824 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1825 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1826 if (oldmat->colmap) { 1827 #if defined (PETSC_USE_CTABLE) 1828 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1829 #else 1830 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1831 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1832 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1833 #endif 1834 } else a->colmap = 0; 1835 if (oldmat->garray) { 1836 int len; 1837 len = oldmat->B->n; 1838 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1839 PetscLogObjectMemory(mat,len*sizeof(int)); 1840 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1841 } else a->garray = 0; 1842 1843 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1844 PetscLogObjectParent(mat,a->lvec); 1845 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1846 PetscLogObjectParent(mat,a->Mvctx); 1847 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1848 PetscLogObjectParent(mat,a->A); 1849 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1850 PetscLogObjectParent(mat,a->B); 1851 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1852 *newmat = mat; 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #include "petscsys.h" 1857 1858 EXTERN_C_BEGIN 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatLoad_MPIAIJ" 1861 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1862 { 1863 Mat A; 1864 PetscScalar *vals,*svals; 1865 MPI_Comm comm = ((PetscObject)viewer)->comm; 1866 MPI_Status status; 1867 int i,nz,ierr,j,rstart,rend,fd; 1868 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1869 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1870 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1871 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) 1872 PetscTruth flag; 1873 #endif 1874 1875 PetscFunctionBegin; 1876 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1878 if (!rank) { 1879 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1880 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1881 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1882 if (header[3] < 0) { 1883 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1884 } 1885 } 1886 1887 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1888 M = header[1]; N = header[2]; 1889 /* determine ownership of all rows */ 1890 m = M/size + ((M % size) > rank); 1891 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1892 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1893 rowners[0] = 0; 1894 for (i=2; i<=size; i++) { 1895 rowners[i] += rowners[i-1]; 1896 } 1897 rstart = rowners[rank]; 1898 rend = rowners[rank+1]; 1899 1900 /* distribute row lengths to all processors */ 1901 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1902 offlens = ourlens + (rend-rstart); 1903 if (!rank) { 1904 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1905 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1906 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1907 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1908 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1909 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1910 } else { 1911 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1912 } 1913 1914 if (!rank) { 1915 /* calculate the number of nonzeros on each processor */ 1916 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1917 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1918 for (i=0; i<size; i++) { 1919 for (j=rowners[i]; j< rowners[i+1]; j++) { 1920 procsnz[i] += rowlengths[j]; 1921 } 1922 } 1923 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1924 1925 /* determine max buffer needed and allocate it */ 1926 maxnz = 0; 1927 for (i=0; i<size; i++) { 1928 maxnz = PetscMax(maxnz,procsnz[i]); 1929 } 1930 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1931 1932 /* read in my part of the matrix column indices */ 1933 nz = procsnz[0]; 1934 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1935 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1936 1937 /* read in every one elses and ship off */ 1938 for (i=1; i<size; i++) { 1939 nz = procsnz[i]; 1940 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1941 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1942 } 1943 ierr = PetscFree(cols);CHKERRQ(ierr); 1944 } else { 1945 /* determine buffer space needed for message */ 1946 nz = 0; 1947 for (i=0; i<m; i++) { 1948 nz += ourlens[i]; 1949 } 1950 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1951 1952 /* receive message of column indices*/ 1953 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1954 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1955 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1956 } 1957 1958 /* determine column ownership if matrix is not square */ 1959 if (N != M) { 1960 n = N/size + ((N % size) > rank); 1961 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1962 cstart = cend - n; 1963 } else { 1964 cstart = rstart; 1965 cend = rend; 1966 n = cend - cstart; 1967 } 1968 1969 /* loop over local rows, determining number of off diagonal entries */ 1970 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1971 jj = 0; 1972 for (i=0; i<m; i++) { 1973 for (j=0; j<ourlens[i]; j++) { 1974 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1975 jj++; 1976 } 1977 } 1978 1979 /* create our matrix */ 1980 for (i=0; i<m; i++) { 1981 ourlens[i] -= offlens[i]; 1982 } 1983 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1984 A = *newmat; 1985 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1986 for (i=0; i<m; i++) { 1987 ourlens[i] += offlens[i]; 1988 } 1989 1990 if (!rank) { 1991 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1992 1993 /* read in my part of the matrix numerical values */ 1994 nz = procsnz[0]; 1995 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1996 1997 /* insert into matrix */ 1998 jj = rstart; 1999 smycols = mycols; 2000 svals = vals; 2001 for (i=0; i<m; i++) { 2002 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2003 smycols += ourlens[i]; 2004 svals += ourlens[i]; 2005 jj++; 2006 } 2007 2008 /* read in other processors and ship out */ 2009 for (i=1; i<size; i++) { 2010 nz = procsnz[i]; 2011 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2012 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2013 } 2014 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2015 } else { 2016 /* receive numeric values */ 2017 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2018 2019 /* receive message of values*/ 2020 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2021 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2022 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2023 2024 /* insert into matrix */ 2025 jj = rstart; 2026 smycols = mycols; 2027 svals = vals; 2028 for (i=0; i<m; i++) { 2029 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2030 smycols += ourlens[i]; 2031 svals += ourlens[i]; 2032 jj++; 2033 } 2034 } 2035 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2036 ierr = PetscFree(vals);CHKERRQ(ierr); 2037 ierr = PetscFree(mycols);CHKERRQ(ierr); 2038 ierr = PetscFree(rowners);CHKERRQ(ierr); 2039 2040 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2041 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2042 #if defined(PETSC_HAVE_SPOOLES) 2043 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 2044 if (flag) { 2045 if (size == 1) { 2046 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2047 } else { 2048 ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); 2049 } 2050 } 2051 #endif 2052 #if defined(PETSC_HAVE_SUPERLUDIST) 2053 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 2054 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 2055 #endif 2056 PetscFunctionReturn(0); 2057 } 2058 EXTERN_C_END 2059 2060 #undef __FUNCT__ 2061 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2062 /* 2063 Not great since it makes two copies of the submatrix, first an SeqAIJ 2064 in local and then by concatenating the local matrices the end result. 2065 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2066 */ 2067 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2068 { 2069 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2070 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2071 Mat *local,M,Mreuse; 2072 PetscScalar *vwork,*aa; 2073 MPI_Comm comm = mat->comm; 2074 Mat_SeqAIJ *aij; 2075 2076 2077 PetscFunctionBegin; 2078 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2079 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2080 2081 if (call == MAT_REUSE_MATRIX) { 2082 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2083 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2084 local = &Mreuse; 2085 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2086 } else { 2087 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2088 Mreuse = *local; 2089 ierr = PetscFree(local);CHKERRQ(ierr); 2090 } 2091 2092 /* 2093 m - number of local rows 2094 n - number of columns (same on all processors) 2095 rstart - first row in new global matrix generated 2096 */ 2097 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2098 if (call == MAT_INITIAL_MATRIX) { 2099 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2100 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2101 ii = aij->i; 2102 jj = aij->j; 2103 2104 /* 2105 Determine the number of non-zeros in the diagonal and off-diagonal 2106 portions of the matrix in order to do correct preallocation 2107 */ 2108 2109 /* first get start and end of "diagonal" columns */ 2110 if (csize == PETSC_DECIDE) { 2111 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2112 if (mglobal == n) { /* square matrix */ 2113 nlocal = m; 2114 } else { 2115 nlocal = n/size + ((n % size) > rank); 2116 } 2117 } else { 2118 nlocal = csize; 2119 } 2120 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2121 rstart = rend - nlocal; 2122 if (rank == size - 1 && rend != n) { 2123 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 2124 } 2125 2126 /* next, compute all the lengths */ 2127 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2128 olens = dlens + m; 2129 for (i=0; i<m; i++) { 2130 jend = ii[i+1] - ii[i]; 2131 olen = 0; 2132 dlen = 0; 2133 for (j=0; j<jend; j++) { 2134 if (*jj < rstart || *jj >= rend) olen++; 2135 else dlen++; 2136 jj++; 2137 } 2138 olens[i] = olen; 2139 dlens[i] = dlen; 2140 } 2141 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2142 ierr = PetscFree(dlens);CHKERRQ(ierr); 2143 } else { 2144 int ml,nl; 2145 2146 M = *newmat; 2147 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2148 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2149 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2150 /* 2151 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2152 rather than the slower MatSetValues(). 2153 */ 2154 M->was_assembled = PETSC_TRUE; 2155 M->assembled = PETSC_FALSE; 2156 } 2157 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2158 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2159 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2160 ii = aij->i; 2161 jj = aij->j; 2162 aa = aij->a; 2163 for (i=0; i<m; i++) { 2164 row = rstart + i; 2165 nz = ii[i+1] - ii[i]; 2166 cwork = jj; jj += nz; 2167 vwork = aa; aa += nz; 2168 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2169 } 2170 2171 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2172 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2173 *newmat = M; 2174 2175 /* save submatrix used in processor for next request */ 2176 if (call == MAT_INITIAL_MATRIX) { 2177 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2178 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2179 } 2180 2181 PetscFunctionReturn(0); 2182 } 2183 2184 #undef __FUNCT__ 2185 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2186 /*@C 2187 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2188 (the default parallel PETSc format). For good matrix assembly performance 2189 the user should preallocate the matrix storage by setting the parameters 2190 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2191 performance can be increased by more than a factor of 50. 2192 2193 Collective on MPI_Comm 2194 2195 Input Parameters: 2196 + A - the matrix 2197 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2198 (same value is used for all local rows) 2199 . d_nnz - array containing the number of nonzeros in the various rows of the 2200 DIAGONAL portion of the local submatrix (possibly different for each row) 2201 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2202 The size of this array is equal to the number of local rows, i.e 'm'. 2203 You must leave room for the diagonal entry even if it is zero. 2204 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2205 submatrix (same value is used for all local rows). 2206 - o_nnz - array containing the number of nonzeros in the various rows of the 2207 OFF-DIAGONAL portion of the local submatrix (possibly different for 2208 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2209 structure. The size of this array is equal to the number 2210 of local rows, i.e 'm'. 2211 2212 The AIJ format (also called the Yale sparse matrix format or 2213 compressed row storage), is fully compatible with standard Fortran 77 2214 storage. That is, the stored row and column indices can begin at 2215 either one (as in Fortran) or zero. See the users manual for details. 2216 2217 The user MUST specify either the local or global matrix dimensions 2218 (possibly both). 2219 2220 The parallel matrix is partitioned such that the first m0 rows belong to 2221 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2222 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2223 2224 The DIAGONAL portion of the local submatrix of a processor can be defined 2225 as the submatrix which is obtained by extraction the part corresponding 2226 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2227 first row that belongs to the processor, and r2 is the last row belonging 2228 to the this processor. This is a square mxm matrix. The remaining portion 2229 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2230 2231 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2232 2233 By default, this format uses inodes (identical nodes) when possible. 2234 We search for consecutive rows with the same nonzero structure, thereby 2235 reusing matrix information to achieve increased efficiency. 2236 2237 Options Database Keys: 2238 + -mat_aij_no_inode - Do not use inodes 2239 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2240 - -mat_aij_oneindex - Internally use indexing starting at 1 2241 rather than 0. Note that when calling MatSetValues(), 2242 the user still MUST index entries starting at 0! 2243 2244 Example usage: 2245 2246 Consider the following 8x8 matrix with 34 non-zero values, that is 2247 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2248 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2249 as follows: 2250 2251 .vb 2252 1 2 0 | 0 3 0 | 0 4 2253 Proc0 0 5 6 | 7 0 0 | 8 0 2254 9 0 10 | 11 0 0 | 12 0 2255 ------------------------------------- 2256 13 0 14 | 15 16 17 | 0 0 2257 Proc1 0 18 0 | 19 20 21 | 0 0 2258 0 0 0 | 22 23 0 | 24 0 2259 ------------------------------------- 2260 Proc2 25 26 27 | 0 0 28 | 29 0 2261 30 0 0 | 31 32 33 | 0 34 2262 .ve 2263 2264 This can be represented as a collection of submatrices as: 2265 2266 .vb 2267 A B C 2268 D E F 2269 G H I 2270 .ve 2271 2272 Where the submatrices A,B,C are owned by proc0, D,E,F are 2273 owned by proc1, G,H,I are owned by proc2. 2274 2275 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2276 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2277 The 'M','N' parameters are 8,8, and have the same values on all procs. 2278 2279 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2280 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2281 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2282 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2283 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2284 matrix, ans [DF] as another SeqAIJ matrix. 2285 2286 When d_nz, o_nz parameters are specified, d_nz storage elements are 2287 allocated for every row of the local diagonal submatrix, and o_nz 2288 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2289 One way to choose d_nz and o_nz is to use the max nonzerors per local 2290 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2291 In this case, the values of d_nz,o_nz are: 2292 .vb 2293 proc0 : dnz = 2, o_nz = 2 2294 proc1 : dnz = 3, o_nz = 2 2295 proc2 : dnz = 1, o_nz = 4 2296 .ve 2297 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2298 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2299 for proc3. i.e we are using 12+15+10=37 storage locations to store 2300 34 values. 2301 2302 When d_nnz, o_nnz parameters are specified, the storage is specified 2303 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2304 In the above case the values for d_nnz,o_nnz are: 2305 .vb 2306 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2307 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2308 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2309 .ve 2310 Here the space allocated is sum of all the above values i.e 34, and 2311 hence pre-allocation is perfect. 2312 2313 Level: intermediate 2314 2315 .keywords: matrix, aij, compressed row, sparse, parallel 2316 2317 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2318 @*/ 2319 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2320 { 2321 Mat_MPIAIJ *b; 2322 int ierr,i; 2323 PetscTruth flg2; 2324 2325 PetscFunctionBegin; 2326 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2327 if (!flg2) PetscFunctionReturn(0); 2328 B->preallocated = PETSC_TRUE; 2329 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2330 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2331 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2332 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2333 if (d_nnz) { 2334 for (i=0; i<B->m; i++) { 2335 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2336 } 2337 } 2338 if (o_nnz) { 2339 for (i=0; i<B->m; i++) { 2340 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2341 } 2342 } 2343 b = (Mat_MPIAIJ*)B->data; 2344 2345 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2346 PetscLogObjectParent(B,b->A); 2347 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2348 PetscLogObjectParent(B,b->B); 2349 2350 PetscFunctionReturn(0); 2351 } 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatCreateMPIAIJ" 2355 /*@C 2356 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2357 (the default parallel PETSc format). For good matrix assembly performance 2358 the user should preallocate the matrix storage by setting the parameters 2359 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2360 performance can be increased by more than a factor of 50. 2361 2362 Collective on MPI_Comm 2363 2364 Input Parameters: 2365 + comm - MPI communicator 2366 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2367 This value should be the same as the local size used in creating the 2368 y vector for the matrix-vector product y = Ax. 2369 . n - This value should be the same as the local size used in creating the 2370 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2371 calculated if N is given) For square matrices n is almost always m. 2372 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2373 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2374 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2375 (same value is used for all local rows) 2376 . d_nnz - array containing the number of nonzeros in the various rows of the 2377 DIAGONAL portion of the local submatrix (possibly different for each row) 2378 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2379 The size of this array is equal to the number of local rows, i.e 'm'. 2380 You must leave room for the diagonal entry even if it is zero. 2381 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2382 submatrix (same value is used for all local rows). 2383 - o_nnz - array containing the number of nonzeros in the various rows of the 2384 OFF-DIAGONAL portion of the local submatrix (possibly different for 2385 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2386 structure. The size of this array is equal to the number 2387 of local rows, i.e 'm'. 2388 2389 Output Parameter: 2390 . A - the matrix 2391 2392 Notes: 2393 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2394 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2395 storage requirements for this matrix. 2396 2397 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2398 processor than it must be used on all processors that share the object for 2399 that argument. 2400 2401 The AIJ format (also called the Yale sparse matrix format or 2402 compressed row storage), is fully compatible with standard Fortran 77 2403 storage. That is, the stored row and column indices can begin at 2404 either one (as in Fortran) or zero. See the users manual for details. 2405 2406 The user MUST specify either the local or global matrix dimensions 2407 (possibly both). 2408 2409 The parallel matrix is partitioned such that the first m0 rows belong to 2410 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2411 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2412 2413 The DIAGONAL portion of the local submatrix of a processor can be defined 2414 as the submatrix which is obtained by extraction the part corresponding 2415 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2416 first row that belongs to the processor, and r2 is the last row belonging 2417 to the this processor. This is a square mxm matrix. The remaining portion 2418 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2419 2420 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2421 2422 By default, this format uses inodes (identical nodes) when possible. 2423 We search for consecutive rows with the same nonzero structure, thereby 2424 reusing matrix information to achieve increased efficiency. 2425 2426 Options Database Keys: 2427 + -mat_aij_no_inode - Do not use inodes 2428 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2429 - -mat_aij_oneindex - Internally use indexing starting at 1 2430 rather than 0. Note that when calling MatSetValues(), 2431 the user still MUST index entries starting at 0! 2432 2433 2434 Example usage: 2435 2436 Consider the following 8x8 matrix with 34 non-zero values, that is 2437 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2438 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2439 as follows: 2440 2441 .vb 2442 1 2 0 | 0 3 0 | 0 4 2443 Proc0 0 5 6 | 7 0 0 | 8 0 2444 9 0 10 | 11 0 0 | 12 0 2445 ------------------------------------- 2446 13 0 14 | 15 16 17 | 0 0 2447 Proc1 0 18 0 | 19 20 21 | 0 0 2448 0 0 0 | 22 23 0 | 24 0 2449 ------------------------------------- 2450 Proc2 25 26 27 | 0 0 28 | 29 0 2451 30 0 0 | 31 32 33 | 0 34 2452 .ve 2453 2454 This can be represented as a collection of submatrices as: 2455 2456 .vb 2457 A B C 2458 D E F 2459 G H I 2460 .ve 2461 2462 Where the submatrices A,B,C are owned by proc0, D,E,F are 2463 owned by proc1, G,H,I are owned by proc2. 2464 2465 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2466 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2467 The 'M','N' parameters are 8,8, and have the same values on all procs. 2468 2469 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2470 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2471 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2472 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2473 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2474 matrix, ans [DF] as another SeqAIJ matrix. 2475 2476 When d_nz, o_nz parameters are specified, d_nz storage elements are 2477 allocated for every row of the local diagonal submatrix, and o_nz 2478 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2479 One way to choose d_nz and o_nz is to use the max nonzerors per local 2480 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2481 In this case, the values of d_nz,o_nz are: 2482 .vb 2483 proc0 : dnz = 2, o_nz = 2 2484 proc1 : dnz = 3, o_nz = 2 2485 proc2 : dnz = 1, o_nz = 4 2486 .ve 2487 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2488 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2489 for proc3. i.e we are using 12+15+10=37 storage locations to store 2490 34 values. 2491 2492 When d_nnz, o_nnz parameters are specified, the storage is specified 2493 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2494 In the above case the values for d_nnz,o_nnz are: 2495 .vb 2496 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2497 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2498 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2499 .ve 2500 Here the space allocated is sum of all the above values i.e 34, and 2501 hence pre-allocation is perfect. 2502 2503 Level: intermediate 2504 2505 .keywords: matrix, aij, compressed row, sparse, parallel 2506 2507 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2508 @*/ 2509 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2510 { 2511 int ierr,size; 2512 2513 PetscFunctionBegin; 2514 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2515 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2516 if (size > 1) { 2517 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2518 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2519 } else { 2520 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2521 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2522 } 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2528 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2529 { 2530 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2531 PetscFunctionBegin; 2532 *Ad = a->A; 2533 *Ao = a->B; 2534 *colmap = a->garray; 2535 PetscFunctionReturn(0); 2536 } 2537 2538 #undef __FUNCT__ 2539 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2540 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2541 { 2542 int ierr,i; 2543 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2544 2545 PetscFunctionBegin; 2546 if (coloring->ctype == IS_COLORING_LOCAL) { 2547 ISColoringValue *allcolors,*colors; 2548 ISColoring ocoloring; 2549 2550 /* set coloring for diagonal portion */ 2551 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2552 2553 /* set coloring for off-diagonal portion */ 2554 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2555 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2556 for (i=0; i<a->B->n; i++) { 2557 colors[i] = allcolors[a->garray[i]]; 2558 } 2559 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2560 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2561 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2562 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2563 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2564 ISColoringValue *colors; 2565 int *larray; 2566 ISColoring ocoloring; 2567 2568 /* set coloring for diagonal portion */ 2569 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2570 for (i=0; i<a->A->n; i++) { 2571 larray[i] = i + a->cstart; 2572 } 2573 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2574 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2575 for (i=0; i<a->A->n; i++) { 2576 colors[i] = coloring->colors[larray[i]]; 2577 } 2578 ierr = PetscFree(larray);CHKERRQ(ierr); 2579 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2580 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2581 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2582 2583 /* set coloring for off-diagonal portion */ 2584 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2585 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2586 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2587 for (i=0; i<a->B->n; i++) { 2588 colors[i] = coloring->colors[larray[i]]; 2589 } 2590 ierr = PetscFree(larray);CHKERRQ(ierr); 2591 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2592 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2593 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2594 } else { 2595 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2596 } 2597 2598 PetscFunctionReturn(0); 2599 } 2600 2601 #undef __FUNCT__ 2602 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2603 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2604 { 2605 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2606 int ierr; 2607 2608 PetscFunctionBegin; 2609 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2610 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2611 PetscFunctionReturn(0); 2612 } 2613 2614 #undef __FUNCT__ 2615 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2616 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2617 { 2618 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2619 int ierr; 2620 2621 PetscFunctionBegin; 2622 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2623 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2624 PetscFunctionReturn(0); 2625 } 2626 2627 #undef __FUNCT__ 2628 #define __FUNCT__ "MatMerge" 2629 /*@C 2630 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2631 matrices from each processor 2632 2633 Collective on MPI_Comm 2634 2635 Input Parameters: 2636 + comm - the communicators the parallel matrix will live on 2637 - inmat - the input sequential matrices 2638 2639 Output Parameter: 2640 . outmat - the parallel matrix generated 2641 2642 Level: advanced 2643 2644 Notes: The number of columns of the matrix in EACH of the seperate files 2645 MUST be the same. 2646 2647 @*/ 2648 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2649 { 2650 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2651 PetscScalar *values; 2652 PetscMap columnmap,rowmap; 2653 2654 PetscFunctionBegin; 2655 2656 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2657 2658 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2659 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2660 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2661 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2662 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2663 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2664 2665 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2666 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2667 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2668 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2669 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2670 2671 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2672 for (i=0;i<m;i++) { 2673 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2674 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2675 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2676 } 2677 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2678 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2679 2680 for (i=0;i<m;i++) { 2681 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2682 I = i + rstart; 2683 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2684 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2685 } 2686 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2687 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2688 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2689 2690 PetscFunctionReturn(0); 2691 } 2692 2693 #undef __FUNCT__ 2694 #define __FUNCT__ "MatFileSplit" 2695 int MatFileSplit(Mat A,char *outfile) 2696 { 2697 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2698 PetscViewer out; 2699 char *name; 2700 Mat B; 2701 PetscScalar *values; 2702 2703 PetscFunctionBegin; 2704 2705 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2706 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2707 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2708 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2709 for (i=0;i<m;i++) { 2710 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2711 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2712 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2713 } 2714 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2715 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2716 2717 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2718 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2719 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2720 sprintf(name,"%s.%d",outfile,rank); 2721 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2722 ierr = PetscFree(name); 2723 ierr = MatView(B,out);CHKERRQ(ierr); 2724 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2725 ierr = MatDestroy(B);CHKERRQ(ierr); 2726 PetscFunctionReturn(0); 2727 } 2728