1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift=0; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 289 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 294 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 #if defined(PETSC_HAVE_SUPERLUDIST) 413 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 414 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 415 #endif 416 417 #if defined(PETSC_HAVE_SPOOLES) 418 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 419 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 420 #endif 421 422 #if defined(PETSC_HAVE_MUMPS) 423 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 424 if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); } 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 431 int MatZeroEntries_MPIAIJ(Mat A) 432 { 433 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 434 int ierr; 435 436 PetscFunctionBegin; 437 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 438 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatZeroRows_MPIAIJ" 444 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 445 { 446 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 447 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 448 int *nprocs,j,idx,nsends,row; 449 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 450 int *rvalues,tag = A->tag,count,base,slen,n,*source; 451 int *lens,imdex,*lrows,*values,rstart=l->rstart; 452 MPI_Comm comm = A->comm; 453 MPI_Request *send_waits,*recv_waits; 454 MPI_Status recv_status,*send_status; 455 IS istmp; 456 PetscTruth found; 457 458 PetscFunctionBegin; 459 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 460 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 461 462 /* first count number of contributors to each processor */ 463 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 464 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 465 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 466 for (i=0; i<N; i++) { 467 idx = rows[i]; 468 found = PETSC_FALSE; 469 for (j=0; j<size; j++) { 470 if (idx >= owners[j] && idx < owners[j+1]) { 471 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 472 } 473 } 474 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 475 } 476 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 477 478 /* inform other processors of number of messages and max length*/ 479 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 480 481 /* post receives: */ 482 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 484 for (i=0; i<nrecvs; i++) { 485 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 486 } 487 488 /* do sends: 489 1) starts[i] gives the starting index in svalues for stuff going to 490 the ith processor 491 */ 492 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 493 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 494 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 495 starts[0] = 0; 496 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 497 for (i=0; i<N; i++) { 498 svalues[starts[owner[i]]++] = rows[i]; 499 } 500 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 501 502 starts[0] = 0; 503 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 504 count = 0; 505 for (i=0; i<size; i++) { 506 if (nprocs[2*i+1]) { 507 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 508 } 509 } 510 ierr = PetscFree(starts);CHKERRQ(ierr); 511 512 base = owners[rank]; 513 514 /* wait on receives */ 515 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 516 source = lens + nrecvs; 517 count = nrecvs; slen = 0; 518 while (count) { 519 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 520 /* unpack receives into our local space */ 521 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 522 source[imdex] = recv_status.MPI_SOURCE; 523 lens[imdex] = n; 524 slen += n; 525 count--; 526 } 527 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 528 529 /* move the data into the send scatter */ 530 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 531 count = 0; 532 for (i=0; i<nrecvs; i++) { 533 values = rvalues + i*nmax; 534 for (j=0; j<lens[i]; j++) { 535 lrows[count++] = values[j] - base; 536 } 537 } 538 ierr = PetscFree(rvalues);CHKERRQ(ierr); 539 ierr = PetscFree(lens);CHKERRQ(ierr); 540 ierr = PetscFree(owner);CHKERRQ(ierr); 541 ierr = PetscFree(nprocs);CHKERRQ(ierr); 542 543 /* actually zap the local rows */ 544 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 545 PetscLogObjectParent(A,istmp); 546 547 /* 548 Zero the required rows. If the "diagonal block" of the matrix 549 is square and the user wishes to set the diagonal we use seperate 550 code so that MatSetValues() is not called for each diagonal allocating 551 new memory, thus calling lots of mallocs and slowing things down. 552 553 Contributed by: Mathew Knepley 554 */ 555 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 556 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 557 if (diag && (l->A->M == l->A->N)) { 558 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 559 } else if (diag) { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 562 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 564 } 565 for (i = 0; i < slen; i++) { 566 row = lrows[i] + rstart; 567 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 568 } 569 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 } else { 572 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 573 } 574 ierr = ISDestroy(istmp);CHKERRQ(ierr); 575 ierr = PetscFree(lrows);CHKERRQ(ierr); 576 577 /* wait on sends */ 578 if (nsends) { 579 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 580 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 581 ierr = PetscFree(send_status);CHKERRQ(ierr); 582 } 583 ierr = PetscFree(send_waits);CHKERRQ(ierr); 584 ierr = PetscFree(svalues);CHKERRQ(ierr); 585 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMult_MPIAIJ" 591 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 int ierr,nt; 595 596 PetscFunctionBegin; 597 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 598 if (nt != A->n) { 599 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 600 } 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultAdd_MPIAIJ" 610 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 int ierr; 614 615 PetscFunctionBegin; 616 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 617 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 618 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 619 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 625 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 626 { 627 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 628 int ierr; 629 630 PetscFunctionBegin; 631 /* do nondiagonal part */ 632 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 633 /* send it on its way */ 634 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 635 /* do local part */ 636 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 637 /* receive remote parts: note this assumes the values are not actually */ 638 /* inserted in yy until the next line, which is true for my implementation*/ 639 /* but is not perhaps always true. */ 640 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } 643 644 EXTERN_C_BEGIN 645 #undef __FUNCT__ 646 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 647 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 648 { 649 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 650 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 651 IS Me,Notme; 652 int M,N,first,last,*notme,i, ierr; 653 654 PetscFunctionBegin; 655 656 /* Easy test: symmetric diagonal block */ 657 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 658 ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr); 659 if (!*f) PetscFunctionReturn(0); 660 661 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 662 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 663 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 664 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 665 for (i=0; i<first; i++) notme[i] = i; 666 for (i=last; i<M; i++) notme[i-last+first] = i; 667 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 668 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 669 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 670 Aoff = Aoffs[0]; 671 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 672 Boff = Boffs[0]; 673 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 674 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 675 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 676 ierr = ISDestroy(Me); CHKERRQ(ierr); 677 ierr = ISDestroy(Notme); CHKERRQ(ierr); 678 679 PetscFunctionReturn(0); 680 } 681 EXTERN_C_END 682 683 #undef __FUNCT__ 684 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 685 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 686 { 687 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 688 int ierr; 689 690 PetscFunctionBegin; 691 /* do nondiagonal part */ 692 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 693 /* send it on its way */ 694 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 695 /* do local part */ 696 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 697 /* receive remote parts: note this assumes the values are not actually */ 698 /* inserted in yy until the next line, which is true for my implementation*/ 699 /* but is not perhaps always true. */ 700 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 /* 705 This only works correctly for square matrices where the subblock A->A is the 706 diagonal block 707 */ 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 710 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 711 { 712 int ierr; 713 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 714 715 PetscFunctionBegin; 716 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 717 if (a->rstart != a->cstart || a->rend != a->cend) { 718 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 719 } 720 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 721 PetscFunctionReturn(0); 722 } 723 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatScale_MPIAIJ" 726 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 727 { 728 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 729 int ierr; 730 731 PetscFunctionBegin; 732 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 733 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 734 PetscFunctionReturn(0); 735 } 736 737 #undef __FUNCT__ 738 #define __FUNCT__ "MatDestroy_MPIAIJ" 739 int MatDestroy_MPIAIJ(Mat mat) 740 { 741 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 742 int ierr; 743 744 PetscFunctionBegin; 745 #if defined(PETSC_USE_LOG) 746 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 747 #endif 748 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 749 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 750 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 751 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 752 #if defined (PETSC_USE_CTABLE) 753 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 754 #else 755 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 756 #endif 757 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 758 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 759 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 760 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 761 ierr = PetscFree(aij);CHKERRQ(ierr); 762 PetscFunctionReturn(0); 763 } 764 765 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 766 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 767 extern int MatFactorInfo_MUMPS(Mat,PetscViewer); 768 769 #undef __FUNCT__ 770 #define __FUNCT__ "MatView_MPIAIJ_Binary" 771 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 772 { 773 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 774 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 775 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 776 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 777 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 778 PetscScalar *column_values; 779 780 PetscFunctionBegin; 781 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 782 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 783 nz = A->nz + B->nz; 784 if (rank == 0) { 785 header[0] = MAT_FILE_COOKIE; 786 header[1] = mat->M; 787 header[2] = mat->N; 788 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 789 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 790 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 791 /* get largest number of rows any processor has */ 792 rlen = mat->m; 793 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 794 for (i=1; i<size; i++) { 795 rlen = PetscMax(rlen,range[i+1] - range[i]); 796 } 797 } else { 798 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 799 rlen = mat->m; 800 } 801 802 /* load up the local row counts */ 803 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 804 for (i=0; i<mat->m; i++) { 805 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 806 } 807 808 /* store the row lengths to the file */ 809 if (rank == 0) { 810 MPI_Status status; 811 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 812 for (i=1; i<size; i++) { 813 rlen = range[i+1] - range[i]; 814 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 815 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 816 } 817 } else { 818 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 819 } 820 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 821 822 /* load up the local column indices */ 823 nzmax = nz; /* )th processor needs space a largest processor needs */ 824 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 825 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 826 cnt = 0; 827 for (i=0; i<mat->m; i++) { 828 for (j=B->i[i]; j<B->i[i+1]; j++) { 829 if ( (col = garray[B->j[j]]) > cstart) break; 830 column_indices[cnt++] = col; 831 } 832 for (k=A->i[i]; k<A->i[i+1]; k++) { 833 column_indices[cnt++] = A->j[k] + cstart; 834 } 835 for (; j<B->i[i+1]; j++) { 836 column_indices[cnt++] = garray[B->j[j]]; 837 } 838 } 839 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 840 841 /* store the column indices to the file */ 842 if (rank == 0) { 843 MPI_Status status; 844 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 845 for (i=1; i<size; i++) { 846 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 847 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 848 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 849 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 850 } 851 } else { 852 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 853 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 854 } 855 ierr = PetscFree(column_indices);CHKERRQ(ierr); 856 857 /* load up the local column values */ 858 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 859 cnt = 0; 860 for (i=0; i<mat->m; i++) { 861 for (j=B->i[i]; j<B->i[i+1]; j++) { 862 if ( garray[B->j[j]] > cstart) break; 863 column_values[cnt++] = B->a[j]; 864 } 865 for (k=A->i[i]; k<A->i[i+1]; k++) { 866 column_values[cnt++] = A->a[k]; 867 } 868 for (; j<B->i[i+1]; j++) { 869 column_values[cnt++] = B->a[j]; 870 } 871 } 872 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 873 874 /* store the column values to the file */ 875 if (rank == 0) { 876 MPI_Status status; 877 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 878 for (i=1; i<size; i++) { 879 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 880 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 881 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 882 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 883 } 884 } else { 885 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 886 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 887 } 888 ierr = PetscFree(column_values);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 #undef __FUNCT__ 893 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 894 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 895 { 896 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 897 int ierr,rank = aij->rank,size = aij->size; 898 PetscTruth isdraw,isascii,flg,isbinary; 899 PetscViewer sviewer; 900 PetscViewerFormat format; 901 902 PetscFunctionBegin; 903 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 904 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 905 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 906 if (isascii) { 907 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 908 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 909 MatInfo info; 910 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 911 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 912 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 913 if (flg) { 914 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 915 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 916 } else { 917 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 918 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 919 } 920 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 921 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 922 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 923 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 924 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 925 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 926 PetscFunctionReturn(0); 927 } else if (format == PETSC_VIEWER_ASCII_INFO) { 928 PetscFunctionReturn(0); 929 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 930 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 931 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 932 #endif 933 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 934 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 935 #endif 936 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE) 937 ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr); 938 #endif 939 PetscFunctionReturn(0); 940 } 941 } else if (isbinary) { 942 if (size == 1) { 943 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 944 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 945 } else { 946 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } else if (isdraw) { 950 PetscDraw draw; 951 PetscTruth isnull; 952 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 953 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 954 } 955 956 if (size == 1) { 957 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 958 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 959 } else { 960 /* assemble the entire matrix onto first processor. */ 961 Mat A; 962 Mat_SeqAIJ *Aloc; 963 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 964 PetscScalar *a; 965 966 if (!rank) { 967 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 968 } else { 969 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 970 } 971 PetscLogObjectParent(mat,A); 972 973 /* copy over the A part */ 974 Aloc = (Mat_SeqAIJ*)aij->A->data; 975 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 976 row = aij->rstart; 977 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 978 for (i=0; i<m; i++) { 979 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 980 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 981 } 982 aj = Aloc->j; 983 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 984 985 /* copy over the B part */ 986 Aloc = (Mat_SeqAIJ*)aij->B->data; 987 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 988 row = aij->rstart; 989 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 990 ct = cols; 991 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 992 for (i=0; i<m; i++) { 993 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 994 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 995 } 996 ierr = PetscFree(ct);CHKERRQ(ierr); 997 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 998 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 999 /* 1000 Everyone has to call to draw the matrix since the graphics waits are 1001 synchronized across all processors that share the PetscDraw object 1002 */ 1003 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1004 if (!rank) { 1005 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1006 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1007 } 1008 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1009 ierr = MatDestroy(A);CHKERRQ(ierr); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatView_MPIAIJ" 1016 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1017 { 1018 int ierr; 1019 PetscTruth isascii,isdraw,issocket,isbinary; 1020 1021 PetscFunctionBegin; 1022 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1023 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1024 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1025 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1026 if (isascii || isdraw || isbinary || issocket) { 1027 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1028 } else { 1029 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1030 } 1031 PetscFunctionReturn(0); 1032 } 1033 1034 1035 1036 #undef __FUNCT__ 1037 #define __FUNCT__ "MatRelax_MPIAIJ" 1038 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1039 { 1040 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1041 int ierr; 1042 Vec bb1; 1043 PetscScalar mone=-1.0; 1044 1045 PetscFunctionBegin; 1046 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1047 1048 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1049 1050 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1051 if (flag & SOR_ZERO_INITIAL_GUESS) { 1052 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1053 its--; 1054 } 1055 1056 while (its--) { 1057 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1058 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1059 1060 /* update rhs: bb1 = bb - B*x */ 1061 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1062 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1063 1064 /* local sweep */ 1065 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1066 CHKERRQ(ierr); 1067 } 1068 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1069 if (flag & SOR_ZERO_INITIAL_GUESS) { 1070 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1071 its--; 1072 } 1073 while (its--) { 1074 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1075 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1076 1077 /* update rhs: bb1 = bb - B*x */ 1078 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1079 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1080 1081 /* local sweep */ 1082 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1083 CHKERRQ(ierr); 1084 } 1085 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1086 if (flag & SOR_ZERO_INITIAL_GUESS) { 1087 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1088 its--; 1089 } 1090 while (its--) { 1091 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1092 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1093 1094 /* update rhs: bb1 = bb - B*x */ 1095 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1096 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1097 1098 /* local sweep */ 1099 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1100 CHKERRQ(ierr); 1101 } 1102 } else { 1103 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1104 } 1105 1106 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1112 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1113 { 1114 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1115 Mat A = mat->A,B = mat->B; 1116 int ierr; 1117 PetscReal isend[5],irecv[5]; 1118 1119 PetscFunctionBegin; 1120 info->block_size = 1.0; 1121 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1122 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1123 isend[3] = info->memory; isend[4] = info->mallocs; 1124 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1125 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1126 isend[3] += info->memory; isend[4] += info->mallocs; 1127 if (flag == MAT_LOCAL) { 1128 info->nz_used = isend[0]; 1129 info->nz_allocated = isend[1]; 1130 info->nz_unneeded = isend[2]; 1131 info->memory = isend[3]; 1132 info->mallocs = isend[4]; 1133 } else if (flag == MAT_GLOBAL_MAX) { 1134 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1135 info->nz_used = irecv[0]; 1136 info->nz_allocated = irecv[1]; 1137 info->nz_unneeded = irecv[2]; 1138 info->memory = irecv[3]; 1139 info->mallocs = irecv[4]; 1140 } else if (flag == MAT_GLOBAL_SUM) { 1141 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1142 info->nz_used = irecv[0]; 1143 info->nz_allocated = irecv[1]; 1144 info->nz_unneeded = irecv[2]; 1145 info->memory = irecv[3]; 1146 info->mallocs = irecv[4]; 1147 } 1148 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1149 info->fill_ratio_needed = 0; 1150 info->factor_mallocs = 0; 1151 info->rows_global = (double)matin->M; 1152 info->columns_global = (double)matin->N; 1153 info->rows_local = (double)matin->m; 1154 info->columns_local = (double)matin->N; 1155 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatSetOption_MPIAIJ" 1161 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1162 { 1163 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1164 int ierr; 1165 1166 PetscFunctionBegin; 1167 switch (op) { 1168 case MAT_NO_NEW_NONZERO_LOCATIONS: 1169 case MAT_YES_NEW_NONZERO_LOCATIONS: 1170 case MAT_COLUMNS_UNSORTED: 1171 case MAT_COLUMNS_SORTED: 1172 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1173 case MAT_KEEP_ZEROED_ROWS: 1174 case MAT_NEW_NONZERO_LOCATION_ERR: 1175 case MAT_USE_INODES: 1176 case MAT_DO_NOT_USE_INODES: 1177 case MAT_IGNORE_ZERO_ENTRIES: 1178 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1179 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1180 break; 1181 case MAT_ROW_ORIENTED: 1182 a->roworiented = PETSC_TRUE; 1183 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1184 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1185 break; 1186 case MAT_ROWS_SORTED: 1187 case MAT_ROWS_UNSORTED: 1188 case MAT_YES_NEW_DIAGONALS: 1189 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1190 break; 1191 case MAT_COLUMN_ORIENTED: 1192 a->roworiented = PETSC_FALSE; 1193 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1194 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1195 break; 1196 case MAT_IGNORE_OFF_PROC_ENTRIES: 1197 a->donotstash = PETSC_TRUE; 1198 break; 1199 case MAT_NO_NEW_DIAGONALS: 1200 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1201 default: 1202 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatGetRow_MPIAIJ" 1209 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1210 { 1211 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1212 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1213 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1214 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1215 int *cmap,*idx_p; 1216 1217 PetscFunctionBegin; 1218 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1219 mat->getrowactive = PETSC_TRUE; 1220 1221 if (!mat->rowvalues && (idx || v)) { 1222 /* 1223 allocate enough space to hold information from the longest row. 1224 */ 1225 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1226 int max = 1,tmp; 1227 for (i=0; i<matin->m; i++) { 1228 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1229 if (max < tmp) { max = tmp; } 1230 } 1231 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1232 mat->rowindices = (int*)(mat->rowvalues + max); 1233 } 1234 1235 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1236 lrow = row - rstart; 1237 1238 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1239 if (!v) {pvA = 0; pvB = 0;} 1240 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1241 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1242 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1243 nztot = nzA + nzB; 1244 1245 cmap = mat->garray; 1246 if (v || idx) { 1247 if (nztot) { 1248 /* Sort by increasing column numbers, assuming A and B already sorted */ 1249 int imark = -1; 1250 if (v) { 1251 *v = v_p = mat->rowvalues; 1252 for (i=0; i<nzB; i++) { 1253 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1254 else break; 1255 } 1256 imark = i; 1257 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1258 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1259 } 1260 if (idx) { 1261 *idx = idx_p = mat->rowindices; 1262 if (imark > -1) { 1263 for (i=0; i<imark; i++) { 1264 idx_p[i] = cmap[cworkB[i]]; 1265 } 1266 } else { 1267 for (i=0; i<nzB; i++) { 1268 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1269 else break; 1270 } 1271 imark = i; 1272 } 1273 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1274 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1275 } 1276 } else { 1277 if (idx) *idx = 0; 1278 if (v) *v = 0; 1279 } 1280 } 1281 *nz = nztot; 1282 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1283 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1289 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1290 { 1291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1292 1293 PetscFunctionBegin; 1294 if (aij->getrowactive == PETSC_FALSE) { 1295 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1296 } 1297 aij->getrowactive = PETSC_FALSE; 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "MatNorm_MPIAIJ" 1303 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1304 { 1305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1306 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1307 int ierr,i,j,cstart = aij->cstart; 1308 PetscReal sum = 0.0; 1309 PetscScalar *v; 1310 1311 PetscFunctionBegin; 1312 if (aij->size == 1) { 1313 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1314 } else { 1315 if (type == NORM_FROBENIUS) { 1316 v = amat->a; 1317 for (i=0; i<amat->nz; i++) { 1318 #if defined(PETSC_USE_COMPLEX) 1319 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1320 #else 1321 sum += (*v)*(*v); v++; 1322 #endif 1323 } 1324 v = bmat->a; 1325 for (i=0; i<bmat->nz; i++) { 1326 #if defined(PETSC_USE_COMPLEX) 1327 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1328 #else 1329 sum += (*v)*(*v); v++; 1330 #endif 1331 } 1332 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1333 *norm = sqrt(*norm); 1334 } else if (type == NORM_1) { /* max column norm */ 1335 PetscReal *tmp,*tmp2; 1336 int *jj,*garray = aij->garray; 1337 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1338 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1339 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1340 *norm = 0.0; 1341 v = amat->a; jj = amat->j; 1342 for (j=0; j<amat->nz; j++) { 1343 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1344 } 1345 v = bmat->a; jj = bmat->j; 1346 for (j=0; j<bmat->nz; j++) { 1347 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1348 } 1349 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1350 for (j=0; j<mat->N; j++) { 1351 if (tmp2[j] > *norm) *norm = tmp2[j]; 1352 } 1353 ierr = PetscFree(tmp);CHKERRQ(ierr); 1354 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1355 } else if (type == NORM_INFINITY) { /* max row norm */ 1356 PetscReal ntemp = 0.0; 1357 for (j=0; j<aij->A->m; j++) { 1358 v = amat->a + amat->i[j]; 1359 sum = 0.0; 1360 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1361 sum += PetscAbsScalar(*v); v++; 1362 } 1363 v = bmat->a + bmat->i[j]; 1364 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1365 sum += PetscAbsScalar(*v); v++; 1366 } 1367 if (sum > ntemp) ntemp = sum; 1368 } 1369 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1370 } else { 1371 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1372 } 1373 } 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatTranspose_MPIAIJ" 1379 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1380 { 1381 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1382 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1383 int ierr; 1384 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1385 Mat B; 1386 PetscScalar *array; 1387 1388 PetscFunctionBegin; 1389 if (!matout && M != N) { 1390 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1391 } 1392 1393 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1394 1395 /* copy over the A part */ 1396 Aloc = (Mat_SeqAIJ*)a->A->data; 1397 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1398 row = a->rstart; 1399 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1400 for (i=0; i<m; i++) { 1401 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1402 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1403 } 1404 aj = Aloc->j; 1405 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1406 1407 /* copy over the B part */ 1408 Aloc = (Mat_SeqAIJ*)a->B->data; 1409 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1410 row = a->rstart; 1411 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1412 ct = cols; 1413 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1414 for (i=0; i<m; i++) { 1415 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1416 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1417 } 1418 ierr = PetscFree(ct);CHKERRQ(ierr); 1419 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1420 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1421 if (matout) { 1422 *matout = B; 1423 } else { 1424 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1425 } 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1431 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1432 { 1433 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1434 Mat a = aij->A,b = aij->B; 1435 int ierr,s1,s2,s3; 1436 1437 PetscFunctionBegin; 1438 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1439 if (rr) { 1440 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1441 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1442 /* Overlap communication with computation. */ 1443 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1444 } 1445 if (ll) { 1446 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1447 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1448 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1449 } 1450 /* scale the diagonal block */ 1451 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1452 1453 if (rr) { 1454 /* Do a scatter end and then right scale the off-diagonal block */ 1455 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1456 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1457 } 1458 1459 PetscFunctionReturn(0); 1460 } 1461 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1465 int MatPrintHelp_MPIAIJ(Mat A) 1466 { 1467 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1468 int ierr; 1469 1470 PetscFunctionBegin; 1471 if (!a->rank) { 1472 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1473 } 1474 PetscFunctionReturn(0); 1475 } 1476 1477 #undef __FUNCT__ 1478 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1479 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1480 { 1481 PetscFunctionBegin; 1482 *bs = 1; 1483 PetscFunctionReturn(0); 1484 } 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1487 int MatSetUnfactored_MPIAIJ(Mat A) 1488 { 1489 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1490 int ierr; 1491 1492 PetscFunctionBegin; 1493 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1494 PetscFunctionReturn(0); 1495 } 1496 1497 #undef __FUNCT__ 1498 #define __FUNCT__ "MatEqual_MPIAIJ" 1499 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1500 { 1501 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1502 Mat a,b,c,d; 1503 PetscTruth flg; 1504 int ierr; 1505 1506 PetscFunctionBegin; 1507 a = matA->A; b = matA->B; 1508 c = matB->A; d = matB->B; 1509 1510 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1511 if (flg == PETSC_TRUE) { 1512 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1513 } 1514 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatCopy_MPIAIJ" 1520 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1521 { 1522 int ierr; 1523 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1524 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1525 1526 PetscFunctionBegin; 1527 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1528 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1529 /* because of the column compression in the off-processor part of the matrix a->B, 1530 the number of columns in a->B and b->B may be different, hence we cannot call 1531 the MatCopy() directly on the two parts. If need be, we can provide a more 1532 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1533 then copying the submatrices */ 1534 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1535 } else { 1536 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1537 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1538 } 1539 PetscFunctionReturn(0); 1540 } 1541 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1544 int MatSetUpPreallocation_MPIAIJ(Mat A) 1545 { 1546 int ierr; 1547 1548 PetscFunctionBegin; 1549 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1550 PetscFunctionReturn(0); 1551 } 1552 1553 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1554 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1555 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1556 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 1557 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1558 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1559 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1560 #endif 1561 1562 #include "petscblaslapack.h" 1563 extern int MatAXPY_SeqAIJ(const PetscScalar[],Mat,Mat,MatStructure); 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MatAXPY_MPIAIJ" 1566 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1567 { 1568 int ierr,one=1,i; 1569 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1570 Mat_SeqAIJ *x,*y; 1571 1572 PetscFunctionBegin; 1573 if (str == SAME_NONZERO_PATTERN) { 1574 x = (Mat_SeqAIJ *)xx->A->data; 1575 y = (Mat_SeqAIJ *)yy->A->data; 1576 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1577 x = (Mat_SeqAIJ *)xx->B->data; 1578 y = (Mat_SeqAIJ *)yy->B->data; 1579 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1580 } else if (str == SUBSET_NONZERO_PATTERN) { 1581 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1582 1583 x = (Mat_SeqAIJ *)xx->B->data; 1584 y = (Mat_SeqAIJ *)yy->B->data; 1585 if (y->xtoy && y->XtoY != xx->B) { 1586 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1587 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1588 } 1589 if (!y->xtoy) { /* get xtoy */ 1590 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1591 y->XtoY = xx->B; 1592 } 1593 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1594 } else { 1595 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 /* -------------------------------------------------------------------*/ 1601 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1602 MatGetRow_MPIAIJ, 1603 MatRestoreRow_MPIAIJ, 1604 MatMult_MPIAIJ, 1605 MatMultAdd_MPIAIJ, 1606 MatMultTranspose_MPIAIJ, 1607 MatMultTransposeAdd_MPIAIJ, 1608 0, 1609 0, 1610 0, 1611 0, 1612 0, 1613 0, 1614 MatRelax_MPIAIJ, 1615 MatTranspose_MPIAIJ, 1616 MatGetInfo_MPIAIJ, 1617 MatEqual_MPIAIJ, 1618 MatGetDiagonal_MPIAIJ, 1619 MatDiagonalScale_MPIAIJ, 1620 MatNorm_MPIAIJ, 1621 MatAssemblyBegin_MPIAIJ, 1622 MatAssemblyEnd_MPIAIJ, 1623 0, 1624 MatSetOption_MPIAIJ, 1625 MatZeroEntries_MPIAIJ, 1626 MatZeroRows_MPIAIJ, 1627 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1628 MatLUFactorSymbolic_MPIAIJ_TFS, 1629 #else 1630 0, 1631 #endif 1632 0, 1633 0, 1634 0, 1635 MatSetUpPreallocation_MPIAIJ, 1636 0, 1637 0, 1638 0, 1639 0, 1640 MatDuplicate_MPIAIJ, 1641 0, 1642 0, 1643 0, 1644 0, 1645 MatAXPY_MPIAIJ, 1646 MatGetSubMatrices_MPIAIJ, 1647 MatIncreaseOverlap_MPIAIJ, 1648 MatGetValues_MPIAIJ, 1649 MatCopy_MPIAIJ, 1650 MatPrintHelp_MPIAIJ, 1651 MatScale_MPIAIJ, 1652 0, 1653 0, 1654 0, 1655 MatGetBlockSize_MPIAIJ, 1656 0, 1657 0, 1658 0, 1659 0, 1660 MatFDColoringCreate_MPIAIJ, 1661 0, 1662 MatSetUnfactored_MPIAIJ, 1663 0, 1664 0, 1665 MatGetSubMatrix_MPIAIJ, 1666 MatDestroy_MPIAIJ, 1667 MatView_MPIAIJ, 1668 MatGetPetscMaps_Petsc, 1669 0, 1670 0, 1671 0, 1672 0, 1673 0, 1674 0, 1675 0, 1676 0, 1677 MatSetColoring_MPIAIJ, 1678 MatSetValuesAdic_MPIAIJ, 1679 MatSetValuesAdifor_MPIAIJ 1680 }; 1681 1682 /* ----------------------------------------------------------------------------------------*/ 1683 1684 EXTERN_C_BEGIN 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1687 int MatStoreValues_MPIAIJ(Mat mat) 1688 { 1689 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1690 int ierr; 1691 1692 PetscFunctionBegin; 1693 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1694 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 EXTERN_C_END 1698 1699 EXTERN_C_BEGIN 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1702 int MatRetrieveValues_MPIAIJ(Mat mat) 1703 { 1704 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1705 int ierr; 1706 1707 PetscFunctionBegin; 1708 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1709 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1710 PetscFunctionReturn(0); 1711 } 1712 EXTERN_C_END 1713 1714 #include "petscpc.h" 1715 EXTERN_C_BEGIN 1716 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1717 EXTERN int MatDiagonalScaleLocal_MPIAIJ(Mat,Vec); 1718 EXTERN_C_END 1719 1720 EXTERN_C_BEGIN 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1723 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1724 { 1725 Mat_MPIAIJ *b; 1726 int ierr,i; 1727 1728 PetscFunctionBegin; 1729 B->preallocated = PETSC_TRUE; 1730 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1731 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1732 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1733 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1734 if (d_nnz) { 1735 for (i=0; i<B->m; i++) { 1736 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1737 } 1738 } 1739 if (o_nnz) { 1740 for (i=0; i<B->m; i++) { 1741 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1742 } 1743 } 1744 b = (Mat_MPIAIJ*)B->data; 1745 1746 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1747 PetscLogObjectParent(B,b->A); 1748 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1749 PetscLogObjectParent(B,b->B); 1750 1751 PetscFunctionReturn(0); 1752 } 1753 EXTERN_C_END 1754 1755 EXTERN_C_BEGIN 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatCreate_MPIAIJ" 1758 int MatCreate_MPIAIJ(Mat B) 1759 { 1760 Mat_MPIAIJ *b; 1761 int ierr,i,size; 1762 1763 PetscFunctionBegin; 1764 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1765 1766 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1767 B->data = (void*)b; 1768 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1769 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1770 B->factor = 0; 1771 B->assembled = PETSC_FALSE; 1772 B->mapping = 0; 1773 1774 B->insertmode = NOT_SET_VALUES; 1775 b->size = size; 1776 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1777 1778 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1779 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1780 1781 /* the information in the maps duplicates the information computed below, eventually 1782 we should remove the duplicate information that is not contained in the maps */ 1783 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1784 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1785 1786 /* build local table of row and column ownerships */ 1787 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1788 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1789 b->cowners = b->rowners + b->size + 2; 1790 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1791 b->rowners[0] = 0; 1792 for (i=2; i<=b->size; i++) { 1793 b->rowners[i] += b->rowners[i-1]; 1794 } 1795 b->rstart = b->rowners[b->rank]; 1796 b->rend = b->rowners[b->rank+1]; 1797 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1798 b->cowners[0] = 0; 1799 for (i=2; i<=b->size; i++) { 1800 b->cowners[i] += b->cowners[i-1]; 1801 } 1802 b->cstart = b->cowners[b->rank]; 1803 b->cend = b->cowners[b->rank+1]; 1804 1805 /* build cache for off array entries formed */ 1806 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1807 b->donotstash = PETSC_FALSE; 1808 b->colmap = 0; 1809 b->garray = 0; 1810 b->roworiented = PETSC_TRUE; 1811 1812 /* stuff used for matrix vector multiply */ 1813 b->lvec = PETSC_NULL; 1814 b->Mvctx = PETSC_NULL; 1815 1816 /* stuff for MatGetRow() */ 1817 b->rowindices = 0; 1818 b->rowvalues = 0; 1819 b->getrowactive = PETSC_FALSE; 1820 1821 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1822 "MatStoreValues_MPIAIJ", 1823 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1824 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1825 "MatRetrieveValues_MPIAIJ", 1826 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1827 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1828 "MatGetDiagonalBlock_MPIAIJ", 1829 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1830 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1831 "MatIsSymmetric_MPIAIJ", 1832 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1833 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1834 "MatMPIAIJSetPreallocation_MPIAIJ", 1835 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1836 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1837 "MatDiagonalScaleLocal_MPIAIJ", 1838 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 EXTERN_C_END 1842 1843 #undef __FUNCT__ 1844 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1845 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1846 { 1847 Mat mat; 1848 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1849 int ierr; 1850 1851 PetscFunctionBegin; 1852 *newmat = 0; 1853 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1854 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1855 a = (Mat_MPIAIJ*)mat->data; 1856 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1857 mat->factor = matin->factor; 1858 mat->assembled = PETSC_TRUE; 1859 mat->insertmode = NOT_SET_VALUES; 1860 mat->preallocated = PETSC_TRUE; 1861 1862 a->rstart = oldmat->rstart; 1863 a->rend = oldmat->rend; 1864 a->cstart = oldmat->cstart; 1865 a->cend = oldmat->cend; 1866 a->size = oldmat->size; 1867 a->rank = oldmat->rank; 1868 a->donotstash = oldmat->donotstash; 1869 a->roworiented = oldmat->roworiented; 1870 a->rowindices = 0; 1871 a->rowvalues = 0; 1872 a->getrowactive = PETSC_FALSE; 1873 1874 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1875 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1876 if (oldmat->colmap) { 1877 #if defined (PETSC_USE_CTABLE) 1878 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1879 #else 1880 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1881 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1882 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1883 #endif 1884 } else a->colmap = 0; 1885 if (oldmat->garray) { 1886 int len; 1887 len = oldmat->B->n; 1888 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1889 PetscLogObjectMemory(mat,len*sizeof(int)); 1890 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1891 } else a->garray = 0; 1892 1893 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1894 PetscLogObjectParent(mat,a->lvec); 1895 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1896 PetscLogObjectParent(mat,a->Mvctx); 1897 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1898 PetscLogObjectParent(mat,a->A); 1899 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1900 PetscLogObjectParent(mat,a->B); 1901 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1902 *newmat = mat; 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #include "petscsys.h" 1907 1908 EXTERN_C_BEGIN 1909 #undef __FUNCT__ 1910 #define __FUNCT__ "MatLoad_MPIAIJ" 1911 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1912 { 1913 Mat A; 1914 PetscScalar *vals,*svals; 1915 MPI_Comm comm = ((PetscObject)viewer)->comm; 1916 MPI_Status status; 1917 int i,nz,ierr,j,rstart,rend,fd; 1918 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1919 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1920 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1921 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS) 1922 PetscTruth flag; 1923 #endif 1924 1925 PetscFunctionBegin; 1926 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1927 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1928 if (!rank) { 1929 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1930 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1931 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1932 if (header[3] < 0) { 1933 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1934 } 1935 } 1936 1937 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1938 M = header[1]; N = header[2]; 1939 /* determine ownership of all rows */ 1940 m = M/size + ((M % size) > rank); 1941 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1942 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1943 rowners[0] = 0; 1944 for (i=2; i<=size; i++) { 1945 rowners[i] += rowners[i-1]; 1946 } 1947 rstart = rowners[rank]; 1948 rend = rowners[rank+1]; 1949 1950 /* distribute row lengths to all processors */ 1951 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1952 offlens = ourlens + (rend-rstart); 1953 if (!rank) { 1954 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1955 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1956 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1957 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1958 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1959 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1960 } else { 1961 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1962 } 1963 1964 if (!rank) { 1965 /* calculate the number of nonzeros on each processor */ 1966 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1967 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1968 for (i=0; i<size; i++) { 1969 for (j=rowners[i]; j< rowners[i+1]; j++) { 1970 procsnz[i] += rowlengths[j]; 1971 } 1972 } 1973 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1974 1975 /* determine max buffer needed and allocate it */ 1976 maxnz = 0; 1977 for (i=0; i<size; i++) { 1978 maxnz = PetscMax(maxnz,procsnz[i]); 1979 } 1980 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1981 1982 /* read in my part of the matrix column indices */ 1983 nz = procsnz[0]; 1984 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1985 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1986 1987 /* read in every one elses and ship off */ 1988 for (i=1; i<size; i++) { 1989 nz = procsnz[i]; 1990 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1991 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1992 } 1993 ierr = PetscFree(cols);CHKERRQ(ierr); 1994 } else { 1995 /* determine buffer space needed for message */ 1996 nz = 0; 1997 for (i=0; i<m; i++) { 1998 nz += ourlens[i]; 1999 } 2000 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2001 2002 /* receive message of column indices*/ 2003 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2004 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2005 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2006 } 2007 2008 /* determine column ownership if matrix is not square */ 2009 if (N != M) { 2010 n = N/size + ((N % size) > rank); 2011 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2012 cstart = cend - n; 2013 } else { 2014 cstart = rstart; 2015 cend = rend; 2016 n = cend - cstart; 2017 } 2018 2019 /* loop over local rows, determining number of off diagonal entries */ 2020 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2021 jj = 0; 2022 for (i=0; i<m; i++) { 2023 for (j=0; j<ourlens[i]; j++) { 2024 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2025 jj++; 2026 } 2027 } 2028 2029 /* create our matrix */ 2030 for (i=0; i<m; i++) { 2031 ourlens[i] -= offlens[i]; 2032 } 2033 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2034 A = *newmat; 2035 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2036 for (i=0; i<m; i++) { 2037 ourlens[i] += offlens[i]; 2038 } 2039 2040 if (!rank) { 2041 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2042 2043 /* read in my part of the matrix numerical values */ 2044 nz = procsnz[0]; 2045 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2046 2047 /* insert into matrix */ 2048 jj = rstart; 2049 smycols = mycols; 2050 svals = vals; 2051 for (i=0; i<m; i++) { 2052 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2053 smycols += ourlens[i]; 2054 svals += ourlens[i]; 2055 jj++; 2056 } 2057 2058 /* read in other processors and ship out */ 2059 for (i=1; i<size; i++) { 2060 nz = procsnz[i]; 2061 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2062 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2063 } 2064 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2065 } else { 2066 /* receive numeric values */ 2067 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2068 2069 /* receive message of values*/ 2070 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2071 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2072 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2073 2074 /* insert into matrix */ 2075 jj = rstart; 2076 smycols = mycols; 2077 svals = vals; 2078 for (i=0; i<m; i++) { 2079 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2080 smycols += ourlens[i]; 2081 svals += ourlens[i]; 2082 jj++; 2083 } 2084 } 2085 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2086 ierr = PetscFree(vals);CHKERRQ(ierr); 2087 ierr = PetscFree(mycols);CHKERRQ(ierr); 2088 ierr = PetscFree(rowners);CHKERRQ(ierr); 2089 2090 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2091 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2092 #if defined(PETSC_HAVE_SPOOLES) 2093 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 2094 if (flag) { 2095 if (size == 1) { 2096 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2097 } else { 2098 ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); 2099 } 2100 } 2101 #endif 2102 #if defined(PETSC_HAVE_SUPERLUDIST) 2103 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 2104 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 2105 #endif 2106 #if defined(PETSC_HAVE_MUMPS) 2107 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 2108 if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); } 2109 #endif 2110 PetscFunctionReturn(0); 2111 } 2112 EXTERN_C_END 2113 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2116 /* 2117 Not great since it makes two copies of the submatrix, first an SeqAIJ 2118 in local and then by concatenating the local matrices the end result. 2119 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2120 */ 2121 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2122 { 2123 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2124 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2125 Mat *local,M,Mreuse; 2126 PetscScalar *vwork,*aa; 2127 MPI_Comm comm = mat->comm; 2128 Mat_SeqAIJ *aij; 2129 2130 2131 PetscFunctionBegin; 2132 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2133 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2134 2135 if (call == MAT_REUSE_MATRIX) { 2136 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2137 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2138 local = &Mreuse; 2139 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2140 } else { 2141 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2142 Mreuse = *local; 2143 ierr = PetscFree(local);CHKERRQ(ierr); 2144 } 2145 2146 /* 2147 m - number of local rows 2148 n - number of columns (same on all processors) 2149 rstart - first row in new global matrix generated 2150 */ 2151 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2152 if (call == MAT_INITIAL_MATRIX) { 2153 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2154 ii = aij->i; 2155 jj = aij->j; 2156 2157 /* 2158 Determine the number of non-zeros in the diagonal and off-diagonal 2159 portions of the matrix in order to do correct preallocation 2160 */ 2161 2162 /* first get start and end of "diagonal" columns */ 2163 if (csize == PETSC_DECIDE) { 2164 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2165 if (mglobal == n) { /* square matrix */ 2166 nlocal = m; 2167 } else { 2168 nlocal = n/size + ((n % size) > rank); 2169 } 2170 } else { 2171 nlocal = csize; 2172 } 2173 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2174 rstart = rend - nlocal; 2175 if (rank == size - 1 && rend != n) { 2176 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2177 } 2178 2179 /* next, compute all the lengths */ 2180 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2181 olens = dlens + m; 2182 for (i=0; i<m; i++) { 2183 jend = ii[i+1] - ii[i]; 2184 olen = 0; 2185 dlen = 0; 2186 for (j=0; j<jend; j++) { 2187 if (*jj < rstart || *jj >= rend) olen++; 2188 else dlen++; 2189 jj++; 2190 } 2191 olens[i] = olen; 2192 dlens[i] = dlen; 2193 } 2194 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2195 ierr = PetscFree(dlens);CHKERRQ(ierr); 2196 } else { 2197 int ml,nl; 2198 2199 M = *newmat; 2200 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2201 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2202 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2203 /* 2204 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2205 rather than the slower MatSetValues(). 2206 */ 2207 M->was_assembled = PETSC_TRUE; 2208 M->assembled = PETSC_FALSE; 2209 } 2210 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2211 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2212 ii = aij->i; 2213 jj = aij->j; 2214 aa = aij->a; 2215 for (i=0; i<m; i++) { 2216 row = rstart + i; 2217 nz = ii[i+1] - ii[i]; 2218 cwork = jj; jj += nz; 2219 vwork = aa; aa += nz; 2220 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2221 } 2222 2223 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2224 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2225 *newmat = M; 2226 2227 /* save submatrix used in processor for next request */ 2228 if (call == MAT_INITIAL_MATRIX) { 2229 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2230 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2231 } 2232 2233 PetscFunctionReturn(0); 2234 } 2235 2236 #undef __FUNCT__ 2237 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2238 /*@C 2239 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2240 (the default parallel PETSc format). For good matrix assembly performance 2241 the user should preallocate the matrix storage by setting the parameters 2242 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2243 performance can be increased by more than a factor of 50. 2244 2245 Collective on MPI_Comm 2246 2247 Input Parameters: 2248 + A - the matrix 2249 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2250 (same value is used for all local rows) 2251 . d_nnz - array containing the number of nonzeros in the various rows of the 2252 DIAGONAL portion of the local submatrix (possibly different for each row) 2253 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2254 The size of this array is equal to the number of local rows, i.e 'm'. 2255 You must leave room for the diagonal entry even if it is zero. 2256 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2257 submatrix (same value is used for all local rows). 2258 - o_nnz - array containing the number of nonzeros in the various rows of the 2259 OFF-DIAGONAL portion of the local submatrix (possibly different for 2260 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2261 structure. The size of this array is equal to the number 2262 of local rows, i.e 'm'. 2263 2264 The AIJ format (also called the Yale sparse matrix format or 2265 compressed row storage), is fully compatible with standard Fortran 77 2266 storage. That is, the stored row and column indices can begin at 2267 either one (as in Fortran) or zero. See the users manual for details. 2268 2269 The user MUST specify either the local or global matrix dimensions 2270 (possibly both). 2271 2272 The parallel matrix is partitioned such that the first m0 rows belong to 2273 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2274 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2275 2276 The DIAGONAL portion of the local submatrix of a processor can be defined 2277 as the submatrix which is obtained by extraction the part corresponding 2278 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2279 first row that belongs to the processor, and r2 is the last row belonging 2280 to the this processor. This is a square mxm matrix. The remaining portion 2281 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2282 2283 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2284 2285 By default, this format uses inodes (identical nodes) when possible. 2286 We search for consecutive rows with the same nonzero structure, thereby 2287 reusing matrix information to achieve increased efficiency. 2288 2289 Options Database Keys: 2290 + -mat_aij_no_inode - Do not use inodes 2291 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2292 - -mat_aij_oneindex - Internally use indexing starting at 1 2293 rather than 0. Note that when calling MatSetValues(), 2294 the user still MUST index entries starting at 0! 2295 2296 Example usage: 2297 2298 Consider the following 8x8 matrix with 34 non-zero values, that is 2299 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2300 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2301 as follows: 2302 2303 .vb 2304 1 2 0 | 0 3 0 | 0 4 2305 Proc0 0 5 6 | 7 0 0 | 8 0 2306 9 0 10 | 11 0 0 | 12 0 2307 ------------------------------------- 2308 13 0 14 | 15 16 17 | 0 0 2309 Proc1 0 18 0 | 19 20 21 | 0 0 2310 0 0 0 | 22 23 0 | 24 0 2311 ------------------------------------- 2312 Proc2 25 26 27 | 0 0 28 | 29 0 2313 30 0 0 | 31 32 33 | 0 34 2314 .ve 2315 2316 This can be represented as a collection of submatrices as: 2317 2318 .vb 2319 A B C 2320 D E F 2321 G H I 2322 .ve 2323 2324 Where the submatrices A,B,C are owned by proc0, D,E,F are 2325 owned by proc1, G,H,I are owned by proc2. 2326 2327 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2328 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2329 The 'M','N' parameters are 8,8, and have the same values on all procs. 2330 2331 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2332 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2333 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2334 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2335 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2336 matrix, ans [DF] as another SeqAIJ matrix. 2337 2338 When d_nz, o_nz parameters are specified, d_nz storage elements are 2339 allocated for every row of the local diagonal submatrix, and o_nz 2340 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2341 One way to choose d_nz and o_nz is to use the max nonzerors per local 2342 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2343 In this case, the values of d_nz,o_nz are: 2344 .vb 2345 proc0 : dnz = 2, o_nz = 2 2346 proc1 : dnz = 3, o_nz = 2 2347 proc2 : dnz = 1, o_nz = 4 2348 .ve 2349 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2350 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2351 for proc3. i.e we are using 12+15+10=37 storage locations to store 2352 34 values. 2353 2354 When d_nnz, o_nnz parameters are specified, the storage is specified 2355 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2356 In the above case the values for d_nnz,o_nnz are: 2357 .vb 2358 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2359 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2360 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2361 .ve 2362 Here the space allocated is sum of all the above values i.e 34, and 2363 hence pre-allocation is perfect. 2364 2365 Level: intermediate 2366 2367 .keywords: matrix, aij, compressed row, sparse, parallel 2368 2369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2370 @*/ 2371 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2372 { 2373 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2374 2375 PetscFunctionBegin; 2376 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2377 if (f) { 2378 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2379 } 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatCreateMPIAIJ" 2385 /*@C 2386 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2387 (the default parallel PETSc format). For good matrix assembly performance 2388 the user should preallocate the matrix storage by setting the parameters 2389 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2390 performance can be increased by more than a factor of 50. 2391 2392 Collective on MPI_Comm 2393 2394 Input Parameters: 2395 + comm - MPI communicator 2396 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2397 This value should be the same as the local size used in creating the 2398 y vector for the matrix-vector product y = Ax. 2399 . n - This value should be the same as the local size used in creating the 2400 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2401 calculated if N is given) For square matrices n is almost always m. 2402 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2403 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2404 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2405 (same value is used for all local rows) 2406 . d_nnz - array containing the number of nonzeros in the various rows of the 2407 DIAGONAL portion of the local submatrix (possibly different for each row) 2408 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2409 The size of this array is equal to the number of local rows, i.e 'm'. 2410 You must leave room for the diagonal entry even if it is zero. 2411 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2412 submatrix (same value is used for all local rows). 2413 - o_nnz - array containing the number of nonzeros in the various rows of the 2414 OFF-DIAGONAL portion of the local submatrix (possibly different for 2415 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2416 structure. The size of this array is equal to the number 2417 of local rows, i.e 'm'. 2418 2419 Output Parameter: 2420 . A - the matrix 2421 2422 Notes: 2423 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2424 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2425 storage requirements for this matrix. 2426 2427 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2428 processor than it must be used on all processors that share the object for 2429 that argument. 2430 2431 The AIJ format (also called the Yale sparse matrix format or 2432 compressed row storage), is fully compatible with standard Fortran 77 2433 storage. That is, the stored row and column indices can begin at 2434 either one (as in Fortran) or zero. See the users manual for details. 2435 2436 The user MUST specify either the local or global matrix dimensions 2437 (possibly both). 2438 2439 The parallel matrix is partitioned such that the first m0 rows belong to 2440 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2441 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2442 2443 The DIAGONAL portion of the local submatrix of a processor can be defined 2444 as the submatrix which is obtained by extraction the part corresponding 2445 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2446 first row that belongs to the processor, and r2 is the last row belonging 2447 to the this processor. This is a square mxm matrix. The remaining portion 2448 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2449 2450 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2451 2452 When calling this routine with a single process communicator, a matrix of 2453 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2454 type of communicator, use the construction mechanism: 2455 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2456 2457 By default, this format uses inodes (identical nodes) when possible. 2458 We search for consecutive rows with the same nonzero structure, thereby 2459 reusing matrix information to achieve increased efficiency. 2460 2461 Options Database Keys: 2462 + -mat_aij_no_inode - Do not use inodes 2463 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2464 - -mat_aij_oneindex - Internally use indexing starting at 1 2465 rather than 0. Note that when calling MatSetValues(), 2466 the user still MUST index entries starting at 0! 2467 2468 2469 Example usage: 2470 2471 Consider the following 8x8 matrix with 34 non-zero values, that is 2472 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2473 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2474 as follows: 2475 2476 .vb 2477 1 2 0 | 0 3 0 | 0 4 2478 Proc0 0 5 6 | 7 0 0 | 8 0 2479 9 0 10 | 11 0 0 | 12 0 2480 ------------------------------------- 2481 13 0 14 | 15 16 17 | 0 0 2482 Proc1 0 18 0 | 19 20 21 | 0 0 2483 0 0 0 | 22 23 0 | 24 0 2484 ------------------------------------- 2485 Proc2 25 26 27 | 0 0 28 | 29 0 2486 30 0 0 | 31 32 33 | 0 34 2487 .ve 2488 2489 This can be represented as a collection of submatrices as: 2490 2491 .vb 2492 A B C 2493 D E F 2494 G H I 2495 .ve 2496 2497 Where the submatrices A,B,C are owned by proc0, D,E,F are 2498 owned by proc1, G,H,I are owned by proc2. 2499 2500 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2501 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2502 The 'M','N' parameters are 8,8, and have the same values on all procs. 2503 2504 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2505 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2506 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2507 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2508 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2509 matrix, ans [DF] as another SeqAIJ matrix. 2510 2511 When d_nz, o_nz parameters are specified, d_nz storage elements are 2512 allocated for every row of the local diagonal submatrix, and o_nz 2513 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2514 One way to choose d_nz and o_nz is to use the max nonzerors per local 2515 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2516 In this case, the values of d_nz,o_nz are: 2517 .vb 2518 proc0 : dnz = 2, o_nz = 2 2519 proc1 : dnz = 3, o_nz = 2 2520 proc2 : dnz = 1, o_nz = 4 2521 .ve 2522 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2523 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2524 for proc3. i.e we are using 12+15+10=37 storage locations to store 2525 34 values. 2526 2527 When d_nnz, o_nnz parameters are specified, the storage is specified 2528 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2529 In the above case the values for d_nnz,o_nnz are: 2530 .vb 2531 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2532 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2533 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2534 .ve 2535 Here the space allocated is sum of all the above values i.e 34, and 2536 hence pre-allocation is perfect. 2537 2538 Level: intermediate 2539 2540 .keywords: matrix, aij, compressed row, sparse, parallel 2541 2542 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2543 @*/ 2544 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2545 { 2546 int ierr,size; 2547 2548 PetscFunctionBegin; 2549 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2550 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2551 if (size > 1) { 2552 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2553 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2554 } else { 2555 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2556 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2563 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2564 { 2565 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2566 PetscFunctionBegin; 2567 *Ad = a->A; 2568 *Ao = a->B; 2569 *colmap = a->garray; 2570 PetscFunctionReturn(0); 2571 } 2572 2573 #undef __FUNCT__ 2574 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2575 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2576 { 2577 int ierr,i; 2578 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2579 2580 PetscFunctionBegin; 2581 if (coloring->ctype == IS_COLORING_LOCAL) { 2582 ISColoringValue *allcolors,*colors; 2583 ISColoring ocoloring; 2584 2585 /* set coloring for diagonal portion */ 2586 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2587 2588 /* set coloring for off-diagonal portion */ 2589 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2590 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2591 for (i=0; i<a->B->n; i++) { 2592 colors[i] = allcolors[a->garray[i]]; 2593 } 2594 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2595 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2596 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2597 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2598 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2599 ISColoringValue *colors; 2600 int *larray; 2601 ISColoring ocoloring; 2602 2603 /* set coloring for diagonal portion */ 2604 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2605 for (i=0; i<a->A->n; i++) { 2606 larray[i] = i + a->cstart; 2607 } 2608 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2609 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2610 for (i=0; i<a->A->n; i++) { 2611 colors[i] = coloring->colors[larray[i]]; 2612 } 2613 ierr = PetscFree(larray);CHKERRQ(ierr); 2614 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2615 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2616 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2617 2618 /* set coloring for off-diagonal portion */ 2619 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2620 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2621 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2622 for (i=0; i<a->B->n; i++) { 2623 colors[i] = coloring->colors[larray[i]]; 2624 } 2625 ierr = PetscFree(larray);CHKERRQ(ierr); 2626 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2627 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2628 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2629 } else { 2630 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2631 } 2632 2633 PetscFunctionReturn(0); 2634 } 2635 2636 #undef __FUNCT__ 2637 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2638 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2639 { 2640 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2641 int ierr; 2642 2643 PetscFunctionBegin; 2644 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2645 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2646 PetscFunctionReturn(0); 2647 } 2648 2649 #undef __FUNCT__ 2650 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2651 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2652 { 2653 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2654 int ierr; 2655 2656 PetscFunctionBegin; 2657 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2658 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2659 PetscFunctionReturn(0); 2660 } 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatMerge" 2664 /*@C 2665 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2666 matrices from each processor 2667 2668 Collective on MPI_Comm 2669 2670 Input Parameters: 2671 + comm - the communicators the parallel matrix will live on 2672 - inmat - the input sequential matrices 2673 2674 Output Parameter: 2675 . outmat - the parallel matrix generated 2676 2677 Level: advanced 2678 2679 Notes: The number of columns of the matrix in EACH of the seperate files 2680 MUST be the same. 2681 2682 @*/ 2683 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2684 { 2685 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2686 PetscScalar *values; 2687 PetscMap columnmap,rowmap; 2688 2689 PetscFunctionBegin; 2690 2691 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2692 2693 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2694 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2695 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2696 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2697 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2698 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2699 2700 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2701 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2702 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2703 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2704 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2705 2706 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2707 for (i=0;i<m;i++) { 2708 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2709 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2710 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2711 } 2712 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2713 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2714 2715 for (i=0;i<m;i++) { 2716 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2717 I = i + rstart; 2718 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2719 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2720 } 2721 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2722 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2723 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2724 2725 PetscFunctionReturn(0); 2726 } 2727 2728 #undef __FUNCT__ 2729 #define __FUNCT__ "MatFileSplit" 2730 int MatFileSplit(Mat A,char *outfile) 2731 { 2732 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2733 PetscViewer out; 2734 char *name; 2735 Mat B; 2736 PetscScalar *values; 2737 2738 PetscFunctionBegin; 2739 2740 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2741 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2742 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2743 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2744 for (i=0;i<m;i++) { 2745 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2746 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2747 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2748 } 2749 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2750 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2751 2752 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2753 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2754 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2755 sprintf(name,"%s.%d",outfile,rank); 2756 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2757 ierr = PetscFree(name); 2758 ierr = MatView(B,out);CHKERRQ(ierr); 2759 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2760 ierr = MatDestroy(B);CHKERRQ(ierr); 2761 PetscFunctionReturn(0); 2762 } 2763