1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift=0; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 #if defined(PETSC_HAVE_SUPERLUDIST) 413 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 414 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 415 #endif 416 417 #if defined(PETSC_HAVE_SPOOLES) 418 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 419 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 420 #endif 421 422 #if defined(PETSC_HAVE_MUMPS) 423 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 424 if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); } 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 431 int MatZeroEntries_MPIAIJ(Mat A) 432 { 433 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 434 int ierr; 435 436 PetscFunctionBegin; 437 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 438 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatZeroRows_MPIAIJ" 444 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 445 { 446 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 447 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 448 int *nprocs,j,idx,nsends,row; 449 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 450 int *rvalues,tag = A->tag,count,base,slen,n,*source; 451 int *lens,imdex,*lrows,*values,rstart=l->rstart; 452 MPI_Comm comm = A->comm; 453 MPI_Request *send_waits,*recv_waits; 454 MPI_Status recv_status,*send_status; 455 IS istmp; 456 PetscTruth found; 457 458 PetscFunctionBegin; 459 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 460 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 461 462 /* first count number of contributors to each processor */ 463 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 464 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 465 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 466 for (i=0; i<N; i++) { 467 idx = rows[i]; 468 found = PETSC_FALSE; 469 for (j=0; j<size; j++) { 470 if (idx >= owners[j] && idx < owners[j+1]) { 471 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 472 } 473 } 474 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 475 } 476 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 477 478 /* inform other processors of number of messages and max length*/ 479 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 480 481 /* post receives: */ 482 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 484 for (i=0; i<nrecvs; i++) { 485 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 486 } 487 488 /* do sends: 489 1) starts[i] gives the starting index in svalues for stuff going to 490 the ith processor 491 */ 492 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 493 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 494 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 495 starts[0] = 0; 496 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 497 for (i=0; i<N; i++) { 498 svalues[starts[owner[i]]++] = rows[i]; 499 } 500 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 501 502 starts[0] = 0; 503 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 504 count = 0; 505 for (i=0; i<size; i++) { 506 if (nprocs[2*i+1]) { 507 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 508 } 509 } 510 ierr = PetscFree(starts);CHKERRQ(ierr); 511 512 base = owners[rank]; 513 514 /* wait on receives */ 515 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 516 source = lens + nrecvs; 517 count = nrecvs; slen = 0; 518 while (count) { 519 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 520 /* unpack receives into our local space */ 521 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 522 source[imdex] = recv_status.MPI_SOURCE; 523 lens[imdex] = n; 524 slen += n; 525 count--; 526 } 527 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 528 529 /* move the data into the send scatter */ 530 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 531 count = 0; 532 for (i=0; i<nrecvs; i++) { 533 values = rvalues + i*nmax; 534 for (j=0; j<lens[i]; j++) { 535 lrows[count++] = values[j] - base; 536 } 537 } 538 ierr = PetscFree(rvalues);CHKERRQ(ierr); 539 ierr = PetscFree(lens);CHKERRQ(ierr); 540 ierr = PetscFree(owner);CHKERRQ(ierr); 541 ierr = PetscFree(nprocs);CHKERRQ(ierr); 542 543 /* actually zap the local rows */ 544 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 545 PetscLogObjectParent(A,istmp); 546 547 /* 548 Zero the required rows. If the "diagonal block" of the matrix 549 is square and the user wishes to set the diagonal we use seperate 550 code so that MatSetValues() is not called for each diagonal allocating 551 new memory, thus calling lots of mallocs and slowing things down. 552 553 Contributed by: Mathew Knepley 554 */ 555 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 556 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 557 if (diag && (l->A->M == l->A->N)) { 558 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 559 } else if (diag) { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 562 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 564 } 565 for (i = 0; i < slen; i++) { 566 row = lrows[i] + rstart; 567 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 568 } 569 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 } else { 572 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 573 } 574 ierr = ISDestroy(istmp);CHKERRQ(ierr); 575 ierr = PetscFree(lrows);CHKERRQ(ierr); 576 577 /* wait on sends */ 578 if (nsends) { 579 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 580 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 581 ierr = PetscFree(send_status);CHKERRQ(ierr); 582 } 583 ierr = PetscFree(send_waits);CHKERRQ(ierr); 584 ierr = PetscFree(svalues);CHKERRQ(ierr); 585 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMult_MPIAIJ" 591 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 int ierr,nt; 595 596 PetscFunctionBegin; 597 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 598 if (nt != A->n) { 599 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 600 } 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultAdd_MPIAIJ" 610 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 int ierr; 614 615 PetscFunctionBegin; 616 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 617 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 618 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 619 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 625 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 626 { 627 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 628 int ierr; 629 630 PetscFunctionBegin; 631 /* do nondiagonal part */ 632 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 633 /* send it on its way */ 634 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 635 /* do local part */ 636 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 637 /* receive remote parts: note this assumes the values are not actually */ 638 /* inserted in yy until the next line, which is true for my implementation*/ 639 /* but is not perhaps always true. */ 640 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } 643 644 EXTERN_C_BEGIN 645 #undef __FUNCT__ 646 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 647 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 648 { 649 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 650 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 651 MatType type; 652 IS Me,Notme; 653 int M,N,first,last,*notme,i, ierr; 654 655 PetscFunctionBegin; 656 657 /* Easy test: symmetric diagonal block */ 658 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 659 ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr); 660 if (!*f) PetscFunctionReturn(0); 661 662 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 663 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 664 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 665 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 666 for (i=0; i<first; i++) notme[i] = i; 667 for (i=last; i<M; i++) notme[i-last+first] = i; 668 ierr = ISCreateGeneral 669 (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 670 ierr = ISCreateStride 671 (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 672 ierr = MatGetSubMatrices 673 (Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 674 Aoff = Aoffs[0]; 675 ierr = MatGetSubMatrices 676 (Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 677 Boff = Boffs[0]; 678 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 679 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 680 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 681 ierr = ISDestroy(Me); CHKERRQ(ierr); 682 ierr = ISDestroy(Notme); CHKERRQ(ierr); 683 684 PetscFunctionReturn(0); 685 } 686 EXTERN_C_END 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 690 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 691 { 692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 693 int ierr; 694 695 PetscFunctionBegin; 696 /* do nondiagonal part */ 697 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 698 /* send it on its way */ 699 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 700 /* do local part */ 701 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 702 /* receive remote parts: note this assumes the values are not actually */ 703 /* inserted in yy until the next line, which is true for my implementation*/ 704 /* but is not perhaps always true. */ 705 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 709 /* 710 This only works correctly for square matrices where the subblock A->A is the 711 diagonal block 712 */ 713 #undef __FUNCT__ 714 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 715 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 716 { 717 int ierr; 718 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 719 720 PetscFunctionBegin; 721 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 722 if (a->rstart != a->cstart || a->rend != a->cend) { 723 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 724 } 725 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatScale_MPIAIJ" 731 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 732 { 733 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 734 int ierr; 735 736 PetscFunctionBegin; 737 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 738 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatDestroy_MPIAIJ" 744 int MatDestroy_MPIAIJ(Mat mat) 745 { 746 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 747 int ierr; 748 749 PetscFunctionBegin; 750 #if defined(PETSC_USE_LOG) 751 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 752 #endif 753 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 754 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 755 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 756 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 757 #if defined (PETSC_USE_CTABLE) 758 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 759 #else 760 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 761 #endif 762 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 763 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 764 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 765 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 766 ierr = PetscFree(aij);CHKERRQ(ierr); 767 PetscFunctionReturn(0); 768 } 769 770 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 771 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 772 extern int MatFactorInfo_MUMPS(Mat,PetscViewer); 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatView_MPIAIJ_Binary" 776 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 777 { 778 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 779 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 780 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 781 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 782 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 783 PetscScalar *column_values; 784 785 PetscFunctionBegin; 786 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 787 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 788 nz = A->nz + B->nz; 789 if (rank == 0) { 790 header[0] = MAT_FILE_COOKIE; 791 header[1] = mat->M; 792 header[2] = mat->N; 793 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 794 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 795 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 796 /* get largest number of rows any processor has */ 797 rlen = mat->m; 798 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 799 for (i=1; i<size; i++) { 800 rlen = PetscMax(rlen,range[i+1] - range[i]); 801 } 802 } else { 803 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 804 rlen = mat->m; 805 } 806 807 /* load up the local row counts */ 808 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 809 for (i=0; i<mat->m; i++) { 810 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 811 } 812 813 /* store the row lengths to the file */ 814 if (rank == 0) { 815 MPI_Status status; 816 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 817 for (i=1; i<size; i++) { 818 rlen = range[i+1] - range[i]; 819 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 820 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 821 } 822 } else { 823 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 824 } 825 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 826 827 /* load up the local column indices */ 828 nzmax = nz; /* )th processor needs space a largest processor needs */ 829 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 830 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 831 cnt = 0; 832 for (i=0; i<mat->m; i++) { 833 for (j=B->i[i]; j<B->i[i+1]; j++) { 834 if ( (col = garray[B->j[j]]) > cstart) break; 835 column_indices[cnt++] = col; 836 } 837 for (k=A->i[i]; k<A->i[i+1]; k++) { 838 column_indices[cnt++] = A->j[k] + cstart; 839 } 840 for (; j<B->i[i+1]; j++) { 841 column_indices[cnt++] = garray[B->j[j]]; 842 } 843 } 844 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 845 846 /* store the column indices to the file */ 847 if (rank == 0) { 848 MPI_Status status; 849 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 850 for (i=1; i<size; i++) { 851 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 852 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 853 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 855 } 856 } else { 857 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 858 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(column_indices);CHKERRQ(ierr); 861 862 /* load up the local column values */ 863 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 864 cnt = 0; 865 for (i=0; i<mat->m; i++) { 866 for (j=B->i[i]; j<B->i[i+1]; j++) { 867 if ( garray[B->j[j]] > cstart) break; 868 column_values[cnt++] = B->a[j]; 869 } 870 for (k=A->i[i]; k<A->i[i+1]; k++) { 871 column_values[cnt++] = A->a[k]; 872 } 873 for (; j<B->i[i+1]; j++) { 874 column_values[cnt++] = B->a[j]; 875 } 876 } 877 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 878 879 /* store the column values to the file */ 880 if (rank == 0) { 881 MPI_Status status; 882 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 883 for (i=1; i<size; i++) { 884 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 885 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 886 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 887 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 888 } 889 } else { 890 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 891 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 892 } 893 ierr = PetscFree(column_values);CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 899 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 900 { 901 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 902 int ierr,rank = aij->rank,size = aij->size; 903 PetscTruth isdraw,isascii,flg,isbinary; 904 PetscViewer sviewer; 905 PetscViewerFormat format; 906 907 PetscFunctionBegin; 908 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 909 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 910 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 911 if (isascii) { 912 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 913 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 914 MatInfo info; 915 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 916 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 917 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 918 if (flg) { 919 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 920 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 921 } else { 922 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 923 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 924 } 925 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 926 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 927 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 928 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 929 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 930 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 931 PetscFunctionReturn(0); 932 } else if (format == PETSC_VIEWER_ASCII_INFO) { 933 PetscFunctionReturn(0); 934 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 935 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 936 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 937 #endif 938 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 939 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 940 #endif 941 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE) 942 ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr); 943 #endif 944 PetscFunctionReturn(0); 945 } 946 } else if (isbinary) { 947 if (size == 1) { 948 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 949 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 950 } else { 951 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 952 } 953 PetscFunctionReturn(0); 954 } else if (isdraw) { 955 PetscDraw draw; 956 PetscTruth isnull; 957 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 958 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 959 } 960 961 if (size == 1) { 962 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 963 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 964 } else { 965 /* assemble the entire matrix onto first processor. */ 966 Mat A; 967 Mat_SeqAIJ *Aloc; 968 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 969 PetscScalar *a; 970 971 if (!rank) { 972 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 973 } else { 974 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 975 } 976 PetscLogObjectParent(mat,A); 977 978 /* copy over the A part */ 979 Aloc = (Mat_SeqAIJ*)aij->A->data; 980 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 981 row = aij->rstart; 982 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 983 for (i=0; i<m; i++) { 984 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 985 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 986 } 987 aj = Aloc->j; 988 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 989 990 /* copy over the B part */ 991 Aloc = (Mat_SeqAIJ*)aij->B->data; 992 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 993 row = aij->rstart; 994 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 995 ct = cols; 996 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 997 for (i=0; i<m; i++) { 998 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 999 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1000 } 1001 ierr = PetscFree(ct);CHKERRQ(ierr); 1002 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1003 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1004 /* 1005 Everyone has to call to draw the matrix since the graphics waits are 1006 synchronized across all processors that share the PetscDraw object 1007 */ 1008 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1009 if (!rank) { 1010 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1011 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1012 } 1013 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1014 ierr = MatDestroy(A);CHKERRQ(ierr); 1015 } 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatView_MPIAIJ" 1021 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1022 { 1023 int ierr; 1024 PetscTruth isascii,isdraw,issocket,isbinary; 1025 1026 PetscFunctionBegin; 1027 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1028 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1029 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1030 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1031 if (isascii || isdraw || isbinary || issocket) { 1032 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1033 } else { 1034 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1035 } 1036 PetscFunctionReturn(0); 1037 } 1038 1039 1040 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatRelax_MPIAIJ" 1043 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1044 { 1045 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1046 int ierr; 1047 Vec bb1; 1048 PetscScalar mone=-1.0; 1049 1050 PetscFunctionBegin; 1051 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1052 1053 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1054 1055 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1056 if (flag & SOR_ZERO_INITIAL_GUESS) { 1057 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1058 its--; 1059 } 1060 1061 while (its--) { 1062 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1063 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1064 1065 /* update rhs: bb1 = bb - B*x */ 1066 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1067 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1068 1069 /* local sweep */ 1070 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1071 CHKERRQ(ierr); 1072 } 1073 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1074 if (flag & SOR_ZERO_INITIAL_GUESS) { 1075 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1076 its--; 1077 } 1078 while (its--) { 1079 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1080 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1081 1082 /* update rhs: bb1 = bb - B*x */ 1083 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1084 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1085 1086 /* local sweep */ 1087 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1088 CHKERRQ(ierr); 1089 } 1090 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1091 if (flag & SOR_ZERO_INITIAL_GUESS) { 1092 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1093 its--; 1094 } 1095 while (its--) { 1096 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1097 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1098 1099 /* update rhs: bb1 = bb - B*x */ 1100 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1101 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1102 1103 /* local sweep */ 1104 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1105 CHKERRQ(ierr); 1106 } 1107 } else { 1108 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1109 } 1110 1111 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1117 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1118 { 1119 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1120 Mat A = mat->A,B = mat->B; 1121 int ierr; 1122 PetscReal isend[5],irecv[5]; 1123 1124 PetscFunctionBegin; 1125 info->block_size = 1.0; 1126 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1127 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1128 isend[3] = info->memory; isend[4] = info->mallocs; 1129 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1130 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1131 isend[3] += info->memory; isend[4] += info->mallocs; 1132 if (flag == MAT_LOCAL) { 1133 info->nz_used = isend[0]; 1134 info->nz_allocated = isend[1]; 1135 info->nz_unneeded = isend[2]; 1136 info->memory = isend[3]; 1137 info->mallocs = isend[4]; 1138 } else if (flag == MAT_GLOBAL_MAX) { 1139 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1140 info->nz_used = irecv[0]; 1141 info->nz_allocated = irecv[1]; 1142 info->nz_unneeded = irecv[2]; 1143 info->memory = irecv[3]; 1144 info->mallocs = irecv[4]; 1145 } else if (flag == MAT_GLOBAL_SUM) { 1146 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1147 info->nz_used = irecv[0]; 1148 info->nz_allocated = irecv[1]; 1149 info->nz_unneeded = irecv[2]; 1150 info->memory = irecv[3]; 1151 info->mallocs = irecv[4]; 1152 } 1153 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1154 info->fill_ratio_needed = 0; 1155 info->factor_mallocs = 0; 1156 info->rows_global = (double)matin->M; 1157 info->columns_global = (double)matin->N; 1158 info->rows_local = (double)matin->m; 1159 info->columns_local = (double)matin->N; 1160 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatSetOption_MPIAIJ" 1166 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1167 { 1168 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1169 int ierr; 1170 1171 PetscFunctionBegin; 1172 switch (op) { 1173 case MAT_NO_NEW_NONZERO_LOCATIONS: 1174 case MAT_YES_NEW_NONZERO_LOCATIONS: 1175 case MAT_COLUMNS_UNSORTED: 1176 case MAT_COLUMNS_SORTED: 1177 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1178 case MAT_KEEP_ZEROED_ROWS: 1179 case MAT_NEW_NONZERO_LOCATION_ERR: 1180 case MAT_USE_INODES: 1181 case MAT_DO_NOT_USE_INODES: 1182 case MAT_IGNORE_ZERO_ENTRIES: 1183 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1184 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1185 break; 1186 case MAT_ROW_ORIENTED: 1187 a->roworiented = PETSC_TRUE; 1188 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1189 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1190 break; 1191 case MAT_ROWS_SORTED: 1192 case MAT_ROWS_UNSORTED: 1193 case MAT_YES_NEW_DIAGONALS: 1194 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1195 break; 1196 case MAT_COLUMN_ORIENTED: 1197 a->roworiented = PETSC_FALSE; 1198 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1199 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1200 break; 1201 case MAT_IGNORE_OFF_PROC_ENTRIES: 1202 a->donotstash = PETSC_TRUE; 1203 break; 1204 case MAT_NO_NEW_DIAGONALS: 1205 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1206 default: 1207 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatGetRow_MPIAIJ" 1214 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1215 { 1216 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1217 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1218 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1219 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1220 int *cmap,*idx_p; 1221 1222 PetscFunctionBegin; 1223 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1224 mat->getrowactive = PETSC_TRUE; 1225 1226 if (!mat->rowvalues && (idx || v)) { 1227 /* 1228 allocate enough space to hold information from the longest row. 1229 */ 1230 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1231 int max = 1,tmp; 1232 for (i=0; i<matin->m; i++) { 1233 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1234 if (max < tmp) { max = tmp; } 1235 } 1236 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1237 mat->rowindices = (int*)(mat->rowvalues + max); 1238 } 1239 1240 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1241 lrow = row - rstart; 1242 1243 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1244 if (!v) {pvA = 0; pvB = 0;} 1245 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1246 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1247 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1248 nztot = nzA + nzB; 1249 1250 cmap = mat->garray; 1251 if (v || idx) { 1252 if (nztot) { 1253 /* Sort by increasing column numbers, assuming A and B already sorted */ 1254 int imark = -1; 1255 if (v) { 1256 *v = v_p = mat->rowvalues; 1257 for (i=0; i<nzB; i++) { 1258 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1259 else break; 1260 } 1261 imark = i; 1262 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1263 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1264 } 1265 if (idx) { 1266 *idx = idx_p = mat->rowindices; 1267 if (imark > -1) { 1268 for (i=0; i<imark; i++) { 1269 idx_p[i] = cmap[cworkB[i]]; 1270 } 1271 } else { 1272 for (i=0; i<nzB; i++) { 1273 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1274 else break; 1275 } 1276 imark = i; 1277 } 1278 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1279 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1280 } 1281 } else { 1282 if (idx) *idx = 0; 1283 if (v) *v = 0; 1284 } 1285 } 1286 *nz = nztot; 1287 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1288 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1294 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1295 { 1296 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1297 1298 PetscFunctionBegin; 1299 if (aij->getrowactive == PETSC_FALSE) { 1300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1301 } 1302 aij->getrowactive = PETSC_FALSE; 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatNorm_MPIAIJ" 1308 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1309 { 1310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1311 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1312 int ierr,i,j,cstart = aij->cstart; 1313 PetscReal sum = 0.0; 1314 PetscScalar *v; 1315 1316 PetscFunctionBegin; 1317 if (aij->size == 1) { 1318 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1319 } else { 1320 if (type == NORM_FROBENIUS) { 1321 v = amat->a; 1322 for (i=0; i<amat->nz; i++) { 1323 #if defined(PETSC_USE_COMPLEX) 1324 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1325 #else 1326 sum += (*v)*(*v); v++; 1327 #endif 1328 } 1329 v = bmat->a; 1330 for (i=0; i<bmat->nz; i++) { 1331 #if defined(PETSC_USE_COMPLEX) 1332 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1333 #else 1334 sum += (*v)*(*v); v++; 1335 #endif 1336 } 1337 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1338 *norm = sqrt(*norm); 1339 } else if (type == NORM_1) { /* max column norm */ 1340 PetscReal *tmp,*tmp2; 1341 int *jj,*garray = aij->garray; 1342 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1343 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1344 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1345 *norm = 0.0; 1346 v = amat->a; jj = amat->j; 1347 for (j=0; j<amat->nz; j++) { 1348 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1349 } 1350 v = bmat->a; jj = bmat->j; 1351 for (j=0; j<bmat->nz; j++) { 1352 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1353 } 1354 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1355 for (j=0; j<mat->N; j++) { 1356 if (tmp2[j] > *norm) *norm = tmp2[j]; 1357 } 1358 ierr = PetscFree(tmp);CHKERRQ(ierr); 1359 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1360 } else if (type == NORM_INFINITY) { /* max row norm */ 1361 PetscReal ntemp = 0.0; 1362 for (j=0; j<aij->A->m; j++) { 1363 v = amat->a + amat->i[j]; 1364 sum = 0.0; 1365 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1366 sum += PetscAbsScalar(*v); v++; 1367 } 1368 v = bmat->a + bmat->i[j]; 1369 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1370 sum += PetscAbsScalar(*v); v++; 1371 } 1372 if (sum > ntemp) ntemp = sum; 1373 } 1374 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1375 } else { 1376 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1377 } 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatTranspose_MPIAIJ" 1384 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1385 { 1386 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1387 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1388 int ierr; 1389 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1390 Mat B; 1391 PetscScalar *array; 1392 1393 PetscFunctionBegin; 1394 if (!matout && M != N) { 1395 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1396 } 1397 1398 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1399 1400 /* copy over the A part */ 1401 Aloc = (Mat_SeqAIJ*)a->A->data; 1402 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1403 row = a->rstart; 1404 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1405 for (i=0; i<m; i++) { 1406 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1407 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1408 } 1409 aj = Aloc->j; 1410 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1411 1412 /* copy over the B part */ 1413 Aloc = (Mat_SeqAIJ*)a->B->data; 1414 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1415 row = a->rstart; 1416 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1417 ct = cols; 1418 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1419 for (i=0; i<m; i++) { 1420 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1421 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1422 } 1423 ierr = PetscFree(ct);CHKERRQ(ierr); 1424 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1425 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1426 if (matout) { 1427 *matout = B; 1428 } else { 1429 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1430 } 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1436 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1437 { 1438 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1439 Mat a = aij->A,b = aij->B; 1440 int ierr,s1,s2,s3; 1441 1442 PetscFunctionBegin; 1443 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1444 if (rr) { 1445 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1446 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1447 /* Overlap communication with computation. */ 1448 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1449 } 1450 if (ll) { 1451 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1452 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1453 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1454 } 1455 /* scale the diagonal block */ 1456 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1457 1458 if (rr) { 1459 /* Do a scatter end and then right scale the off-diagonal block */ 1460 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1461 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1462 } 1463 1464 PetscFunctionReturn(0); 1465 } 1466 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1470 int MatPrintHelp_MPIAIJ(Mat A) 1471 { 1472 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1473 int ierr; 1474 1475 PetscFunctionBegin; 1476 if (!a->rank) { 1477 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1478 } 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1484 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1485 { 1486 PetscFunctionBegin; 1487 *bs = 1; 1488 PetscFunctionReturn(0); 1489 } 1490 #undef __FUNCT__ 1491 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1492 int MatSetUnfactored_MPIAIJ(Mat A) 1493 { 1494 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1495 int ierr; 1496 1497 PetscFunctionBegin; 1498 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatEqual_MPIAIJ" 1504 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1505 { 1506 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1507 Mat a,b,c,d; 1508 PetscTruth flg; 1509 int ierr; 1510 1511 PetscFunctionBegin; 1512 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1513 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1514 a = matA->A; b = matA->B; 1515 c = matB->A; d = matB->B; 1516 1517 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1518 if (flg == PETSC_TRUE) { 1519 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1520 } 1521 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1522 PetscFunctionReturn(0); 1523 } 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatCopy_MPIAIJ" 1527 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1528 { 1529 int ierr; 1530 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1531 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1532 PetscTruth flg; 1533 1534 PetscFunctionBegin; 1535 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1536 if (str != SAME_NONZERO_PATTERN || !flg) { 1537 /* because of the column compression in the off-processor part of the matrix a->B, 1538 the number of columns in a->B and b->B may be different, hence we cannot call 1539 the MatCopy() directly on the two parts. If need be, we can provide a more 1540 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1541 then copying the submatrices */ 1542 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1543 } else { 1544 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1545 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1546 } 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1552 int MatSetUpPreallocation_MPIAIJ(Mat A) 1553 { 1554 int ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1562 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1563 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1564 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1565 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1566 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1567 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1568 #endif 1569 1570 #include "petscblaslapack.h" 1571 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure); 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatAXPY_MPIAIJ" 1574 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1575 { 1576 int ierr,one=1,i; 1577 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1578 Mat_SeqAIJ *x,*y; 1579 1580 PetscFunctionBegin; 1581 if (str == SAME_NONZERO_PATTERN) { 1582 x = (Mat_SeqAIJ *)xx->A->data; 1583 y = (Mat_SeqAIJ *)yy->A->data; 1584 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1585 x = (Mat_SeqAIJ *)xx->B->data; 1586 y = (Mat_SeqAIJ *)yy->B->data; 1587 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1588 } else if (str == SUBSET_NONZERO_PATTERN) { 1589 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1590 1591 x = (Mat_SeqAIJ *)xx->B->data; 1592 y = (Mat_SeqAIJ *)yy->B->data; 1593 if (y->xtoy && y->XtoY != xx->B) { 1594 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1595 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1596 } 1597 if (!y->xtoy) { /* get xtoy */ 1598 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1599 y->XtoY = xx->B; 1600 } 1601 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1602 } else { 1603 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1604 } 1605 PetscFunctionReturn(0); 1606 } 1607 1608 /* -------------------------------------------------------------------*/ 1609 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1610 MatGetRow_MPIAIJ, 1611 MatRestoreRow_MPIAIJ, 1612 MatMult_MPIAIJ, 1613 MatMultAdd_MPIAIJ, 1614 MatMultTranspose_MPIAIJ, 1615 MatMultTransposeAdd_MPIAIJ, 1616 0, 1617 0, 1618 0, 1619 0, 1620 0, 1621 0, 1622 MatRelax_MPIAIJ, 1623 MatTranspose_MPIAIJ, 1624 MatGetInfo_MPIAIJ, 1625 MatEqual_MPIAIJ, 1626 MatGetDiagonal_MPIAIJ, 1627 MatDiagonalScale_MPIAIJ, 1628 MatNorm_MPIAIJ, 1629 MatAssemblyBegin_MPIAIJ, 1630 MatAssemblyEnd_MPIAIJ, 1631 0, 1632 MatSetOption_MPIAIJ, 1633 MatZeroEntries_MPIAIJ, 1634 MatZeroRows_MPIAIJ, 1635 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1636 MatLUFactorSymbolic_MPIAIJ_TFS, 1637 #else 1638 0, 1639 #endif 1640 0, 1641 0, 1642 0, 1643 MatSetUpPreallocation_MPIAIJ, 1644 0, 1645 0, 1646 0, 1647 0, 1648 MatDuplicate_MPIAIJ, 1649 0, 1650 0, 1651 0, 1652 0, 1653 MatAXPY_MPIAIJ, 1654 MatGetSubMatrices_MPIAIJ, 1655 MatIncreaseOverlap_MPIAIJ, 1656 MatGetValues_MPIAIJ, 1657 MatCopy_MPIAIJ, 1658 MatPrintHelp_MPIAIJ, 1659 MatScale_MPIAIJ, 1660 0, 1661 0, 1662 0, 1663 MatGetBlockSize_MPIAIJ, 1664 0, 1665 0, 1666 0, 1667 0, 1668 MatFDColoringCreate_MPIAIJ, 1669 0, 1670 MatSetUnfactored_MPIAIJ, 1671 0, 1672 0, 1673 MatGetSubMatrix_MPIAIJ, 1674 MatDestroy_MPIAIJ, 1675 MatView_MPIAIJ, 1676 MatGetPetscMaps_Petsc, 1677 0, 1678 0, 1679 0, 1680 0, 1681 0, 1682 0, 1683 0, 1684 0, 1685 MatSetColoring_MPIAIJ, 1686 MatSetValuesAdic_MPIAIJ, 1687 MatSetValuesAdifor_MPIAIJ 1688 }; 1689 1690 /* ----------------------------------------------------------------------------------------*/ 1691 1692 EXTERN_C_BEGIN 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1695 int MatStoreValues_MPIAIJ(Mat mat) 1696 { 1697 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1698 int ierr; 1699 1700 PetscFunctionBegin; 1701 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1702 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 EXTERN_C_END 1706 1707 EXTERN_C_BEGIN 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1710 int MatRetrieveValues_MPIAIJ(Mat mat) 1711 { 1712 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1713 int ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1717 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1718 PetscFunctionReturn(0); 1719 } 1720 EXTERN_C_END 1721 1722 #include "petscpc.h" 1723 EXTERN_C_BEGIN 1724 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1725 EXTERN_C_END 1726 1727 EXTERN_C_BEGIN 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1730 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1731 { 1732 Mat_MPIAIJ *b; 1733 int ierr,i; 1734 PetscTruth flg2; 1735 1736 PetscFunctionBegin; 1737 B->preallocated = PETSC_TRUE; 1738 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1739 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1740 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1741 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1742 if (d_nnz) { 1743 for (i=0; i<B->m; i++) { 1744 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1745 } 1746 } 1747 if (o_nnz) { 1748 for (i=0; i<B->m; i++) { 1749 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1750 } 1751 } 1752 b = (Mat_MPIAIJ*)B->data; 1753 1754 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1755 PetscLogObjectParent(B,b->A); 1756 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1757 PetscLogObjectParent(B,b->B); 1758 1759 PetscFunctionReturn(0); 1760 } 1761 EXTERN_C_END 1762 1763 EXTERN_C_BEGIN 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatCreate_MPIAIJ" 1766 int MatCreate_MPIAIJ(Mat B) 1767 { 1768 Mat_MPIAIJ *b; 1769 int ierr,i,size; 1770 1771 PetscFunctionBegin; 1772 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1773 1774 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1775 B->data = (void*)b; 1776 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1777 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1778 B->factor = 0; 1779 B->assembled = PETSC_FALSE; 1780 B->mapping = 0; 1781 1782 B->insertmode = NOT_SET_VALUES; 1783 b->size = size; 1784 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1785 1786 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1787 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1788 1789 /* the information in the maps duplicates the information computed below, eventually 1790 we should remove the duplicate information that is not contained in the maps */ 1791 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1792 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1793 1794 /* build local table of row and column ownerships */ 1795 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1796 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1797 b->cowners = b->rowners + b->size + 2; 1798 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1799 b->rowners[0] = 0; 1800 for (i=2; i<=b->size; i++) { 1801 b->rowners[i] += b->rowners[i-1]; 1802 } 1803 b->rstart = b->rowners[b->rank]; 1804 b->rend = b->rowners[b->rank+1]; 1805 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1806 b->cowners[0] = 0; 1807 for (i=2; i<=b->size; i++) { 1808 b->cowners[i] += b->cowners[i-1]; 1809 } 1810 b->cstart = b->cowners[b->rank]; 1811 b->cend = b->cowners[b->rank+1]; 1812 1813 /* build cache for off array entries formed */ 1814 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1815 b->donotstash = PETSC_FALSE; 1816 b->colmap = 0; 1817 b->garray = 0; 1818 b->roworiented = PETSC_TRUE; 1819 1820 /* stuff used for matrix vector multiply */ 1821 b->lvec = PETSC_NULL; 1822 b->Mvctx = PETSC_NULL; 1823 1824 /* stuff for MatGetRow() */ 1825 b->rowindices = 0; 1826 b->rowvalues = 0; 1827 b->getrowactive = PETSC_FALSE; 1828 1829 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1830 "MatStoreValues_MPIAIJ", 1831 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1832 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1833 "MatRetrieveValues_MPIAIJ", 1834 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1835 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1836 "MatGetDiagonalBlock_MPIAIJ", 1837 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1838 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1839 "MatIsSymmetric_MPIAIJ", 1840 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1841 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1842 "MatMPIAIJSetPreallocation_MPIAIJ", 1843 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1844 PetscFunctionReturn(0); 1845 } 1846 EXTERN_C_END 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1850 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1851 { 1852 Mat mat; 1853 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1854 int ierr; 1855 1856 PetscFunctionBegin; 1857 *newmat = 0; 1858 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1859 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1860 a = (Mat_MPIAIJ*)mat->data; 1861 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1862 mat->factor = matin->factor; 1863 mat->assembled = PETSC_TRUE; 1864 mat->insertmode = NOT_SET_VALUES; 1865 mat->preallocated = PETSC_TRUE; 1866 1867 a->rstart = oldmat->rstart; 1868 a->rend = oldmat->rend; 1869 a->cstart = oldmat->cstart; 1870 a->cend = oldmat->cend; 1871 a->size = oldmat->size; 1872 a->rank = oldmat->rank; 1873 a->donotstash = oldmat->donotstash; 1874 a->roworiented = oldmat->roworiented; 1875 a->rowindices = 0; 1876 a->rowvalues = 0; 1877 a->getrowactive = PETSC_FALSE; 1878 1879 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1880 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1881 if (oldmat->colmap) { 1882 #if defined (PETSC_USE_CTABLE) 1883 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1884 #else 1885 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1886 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1887 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1888 #endif 1889 } else a->colmap = 0; 1890 if (oldmat->garray) { 1891 int len; 1892 len = oldmat->B->n; 1893 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1894 PetscLogObjectMemory(mat,len*sizeof(int)); 1895 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1896 } else a->garray = 0; 1897 1898 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1899 PetscLogObjectParent(mat,a->lvec); 1900 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1901 PetscLogObjectParent(mat,a->Mvctx); 1902 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1903 PetscLogObjectParent(mat,a->A); 1904 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1905 PetscLogObjectParent(mat,a->B); 1906 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1907 *newmat = mat; 1908 PetscFunctionReturn(0); 1909 } 1910 1911 #include "petscsys.h" 1912 1913 EXTERN_C_BEGIN 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatLoad_MPIAIJ" 1916 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1917 { 1918 Mat A; 1919 PetscScalar *vals,*svals; 1920 MPI_Comm comm = ((PetscObject)viewer)->comm; 1921 MPI_Status status; 1922 int i,nz,ierr,j,rstart,rend,fd; 1923 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1924 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1925 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1926 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS) 1927 PetscTruth flag; 1928 #endif 1929 1930 PetscFunctionBegin; 1931 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1932 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1933 if (!rank) { 1934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1935 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1936 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1937 if (header[3] < 0) { 1938 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1939 } 1940 } 1941 1942 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1943 M = header[1]; N = header[2]; 1944 /* determine ownership of all rows */ 1945 m = M/size + ((M % size) > rank); 1946 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1947 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1948 rowners[0] = 0; 1949 for (i=2; i<=size; i++) { 1950 rowners[i] += rowners[i-1]; 1951 } 1952 rstart = rowners[rank]; 1953 rend = rowners[rank+1]; 1954 1955 /* distribute row lengths to all processors */ 1956 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1957 offlens = ourlens + (rend-rstart); 1958 if (!rank) { 1959 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1960 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1961 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1962 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1963 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1964 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1965 } else { 1966 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1967 } 1968 1969 if (!rank) { 1970 /* calculate the number of nonzeros on each processor */ 1971 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1972 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1973 for (i=0; i<size; i++) { 1974 for (j=rowners[i]; j< rowners[i+1]; j++) { 1975 procsnz[i] += rowlengths[j]; 1976 } 1977 } 1978 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1979 1980 /* determine max buffer needed and allocate it */ 1981 maxnz = 0; 1982 for (i=0; i<size; i++) { 1983 maxnz = PetscMax(maxnz,procsnz[i]); 1984 } 1985 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1986 1987 /* read in my part of the matrix column indices */ 1988 nz = procsnz[0]; 1989 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1990 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1991 1992 /* read in every one elses and ship off */ 1993 for (i=1; i<size; i++) { 1994 nz = procsnz[i]; 1995 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1996 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1997 } 1998 ierr = PetscFree(cols);CHKERRQ(ierr); 1999 } else { 2000 /* determine buffer space needed for message */ 2001 nz = 0; 2002 for (i=0; i<m; i++) { 2003 nz += ourlens[i]; 2004 } 2005 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2006 2007 /* receive message of column indices*/ 2008 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2009 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2010 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2011 } 2012 2013 /* determine column ownership if matrix is not square */ 2014 if (N != M) { 2015 n = N/size + ((N % size) > rank); 2016 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2017 cstart = cend - n; 2018 } else { 2019 cstart = rstart; 2020 cend = rend; 2021 n = cend - cstart; 2022 } 2023 2024 /* loop over local rows, determining number of off diagonal entries */ 2025 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2026 jj = 0; 2027 for (i=0; i<m; i++) { 2028 for (j=0; j<ourlens[i]; j++) { 2029 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2030 jj++; 2031 } 2032 } 2033 2034 /* create our matrix */ 2035 for (i=0; i<m; i++) { 2036 ourlens[i] -= offlens[i]; 2037 } 2038 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2039 A = *newmat; 2040 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2041 for (i=0; i<m; i++) { 2042 ourlens[i] += offlens[i]; 2043 } 2044 2045 if (!rank) { 2046 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2047 2048 /* read in my part of the matrix numerical values */ 2049 nz = procsnz[0]; 2050 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2051 2052 /* insert into matrix */ 2053 jj = rstart; 2054 smycols = mycols; 2055 svals = vals; 2056 for (i=0; i<m; i++) { 2057 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2058 smycols += ourlens[i]; 2059 svals += ourlens[i]; 2060 jj++; 2061 } 2062 2063 /* read in other processors and ship out */ 2064 for (i=1; i<size; i++) { 2065 nz = procsnz[i]; 2066 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2067 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2068 } 2069 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2070 } else { 2071 /* receive numeric values */ 2072 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2073 2074 /* receive message of values*/ 2075 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2076 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2077 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2078 2079 /* insert into matrix */ 2080 jj = rstart; 2081 smycols = mycols; 2082 svals = vals; 2083 for (i=0; i<m; i++) { 2084 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2085 smycols += ourlens[i]; 2086 svals += ourlens[i]; 2087 jj++; 2088 } 2089 } 2090 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2091 ierr = PetscFree(vals);CHKERRQ(ierr); 2092 ierr = PetscFree(mycols);CHKERRQ(ierr); 2093 ierr = PetscFree(rowners);CHKERRQ(ierr); 2094 2095 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2096 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2097 #if defined(PETSC_HAVE_SPOOLES) 2098 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 2099 if (flag) { 2100 if (size == 1) { 2101 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2102 } else { 2103 ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); 2104 } 2105 } 2106 #endif 2107 #if defined(PETSC_HAVE_SUPERLUDIST) 2108 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 2109 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 2110 #endif 2111 #if defined(PETSC_HAVE_MUMPS) 2112 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 2113 if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); } 2114 #endif 2115 PetscFunctionReturn(0); 2116 } 2117 EXTERN_C_END 2118 2119 #undef __FUNCT__ 2120 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2121 /* 2122 Not great since it makes two copies of the submatrix, first an SeqAIJ 2123 in local and then by concatenating the local matrices the end result. 2124 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2125 */ 2126 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2127 { 2128 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2129 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2130 Mat *local,M,Mreuse; 2131 PetscScalar *vwork,*aa; 2132 MPI_Comm comm = mat->comm; 2133 Mat_SeqAIJ *aij; 2134 2135 2136 PetscFunctionBegin; 2137 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2138 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2139 2140 if (call == MAT_REUSE_MATRIX) { 2141 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2142 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2143 local = &Mreuse; 2144 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2145 } else { 2146 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2147 Mreuse = *local; 2148 ierr = PetscFree(local);CHKERRQ(ierr); 2149 } 2150 2151 /* 2152 m - number of local rows 2153 n - number of columns (same on all processors) 2154 rstart - first row in new global matrix generated 2155 */ 2156 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2157 if (call == MAT_INITIAL_MATRIX) { 2158 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2159 ii = aij->i; 2160 jj = aij->j; 2161 2162 /* 2163 Determine the number of non-zeros in the diagonal and off-diagonal 2164 portions of the matrix in order to do correct preallocation 2165 */ 2166 2167 /* first get start and end of "diagonal" columns */ 2168 if (csize == PETSC_DECIDE) { 2169 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2170 if (mglobal == n) { /* square matrix */ 2171 nlocal = m; 2172 } else { 2173 nlocal = n/size + ((n % size) > rank); 2174 } 2175 } else { 2176 nlocal = csize; 2177 } 2178 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2179 rstart = rend - nlocal; 2180 if (rank == size - 1 && rend != n) { 2181 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2182 } 2183 2184 /* next, compute all the lengths */ 2185 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2186 olens = dlens + m; 2187 for (i=0; i<m; i++) { 2188 jend = ii[i+1] - ii[i]; 2189 olen = 0; 2190 dlen = 0; 2191 for (j=0; j<jend; j++) { 2192 if (*jj < rstart || *jj >= rend) olen++; 2193 else dlen++; 2194 jj++; 2195 } 2196 olens[i] = olen; 2197 dlens[i] = dlen; 2198 } 2199 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2200 ierr = PetscFree(dlens);CHKERRQ(ierr); 2201 } else { 2202 int ml,nl; 2203 2204 M = *newmat; 2205 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2206 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2207 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2208 /* 2209 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2210 rather than the slower MatSetValues(). 2211 */ 2212 M->was_assembled = PETSC_TRUE; 2213 M->assembled = PETSC_FALSE; 2214 } 2215 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2216 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2217 ii = aij->i; 2218 jj = aij->j; 2219 aa = aij->a; 2220 for (i=0; i<m; i++) { 2221 row = rstart + i; 2222 nz = ii[i+1] - ii[i]; 2223 cwork = jj; jj += nz; 2224 vwork = aa; aa += nz; 2225 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2226 } 2227 2228 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2229 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2230 *newmat = M; 2231 2232 /* save submatrix used in processor for next request */ 2233 if (call == MAT_INITIAL_MATRIX) { 2234 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2235 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2236 } 2237 2238 PetscFunctionReturn(0); 2239 } 2240 2241 #undef __FUNCT__ 2242 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2243 /*@C 2244 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2245 (the default parallel PETSc format). For good matrix assembly performance 2246 the user should preallocate the matrix storage by setting the parameters 2247 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2248 performance can be increased by more than a factor of 50. 2249 2250 Collective on MPI_Comm 2251 2252 Input Parameters: 2253 + A - the matrix 2254 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2255 (same value is used for all local rows) 2256 . d_nnz - array containing the number of nonzeros in the various rows of the 2257 DIAGONAL portion of the local submatrix (possibly different for each row) 2258 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2259 The size of this array is equal to the number of local rows, i.e 'm'. 2260 You must leave room for the diagonal entry even if it is zero. 2261 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2262 submatrix (same value is used for all local rows). 2263 - o_nnz - array containing the number of nonzeros in the various rows of the 2264 OFF-DIAGONAL portion of the local submatrix (possibly different for 2265 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2266 structure. The size of this array is equal to the number 2267 of local rows, i.e 'm'. 2268 2269 The AIJ format (also called the Yale sparse matrix format or 2270 compressed row storage), is fully compatible with standard Fortran 77 2271 storage. That is, the stored row and column indices can begin at 2272 either one (as in Fortran) or zero. See the users manual for details. 2273 2274 The user MUST specify either the local or global matrix dimensions 2275 (possibly both). 2276 2277 The parallel matrix is partitioned such that the first m0 rows belong to 2278 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2279 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2280 2281 The DIAGONAL portion of the local submatrix of a processor can be defined 2282 as the submatrix which is obtained by extraction the part corresponding 2283 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2284 first row that belongs to the processor, and r2 is the last row belonging 2285 to the this processor. This is a square mxm matrix. The remaining portion 2286 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2287 2288 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2289 2290 By default, this format uses inodes (identical nodes) when possible. 2291 We search for consecutive rows with the same nonzero structure, thereby 2292 reusing matrix information to achieve increased efficiency. 2293 2294 Options Database Keys: 2295 + -mat_aij_no_inode - Do not use inodes 2296 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2297 - -mat_aij_oneindex - Internally use indexing starting at 1 2298 rather than 0. Note that when calling MatSetValues(), 2299 the user still MUST index entries starting at 0! 2300 2301 Example usage: 2302 2303 Consider the following 8x8 matrix with 34 non-zero values, that is 2304 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2305 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2306 as follows: 2307 2308 .vb 2309 1 2 0 | 0 3 0 | 0 4 2310 Proc0 0 5 6 | 7 0 0 | 8 0 2311 9 0 10 | 11 0 0 | 12 0 2312 ------------------------------------- 2313 13 0 14 | 15 16 17 | 0 0 2314 Proc1 0 18 0 | 19 20 21 | 0 0 2315 0 0 0 | 22 23 0 | 24 0 2316 ------------------------------------- 2317 Proc2 25 26 27 | 0 0 28 | 29 0 2318 30 0 0 | 31 32 33 | 0 34 2319 .ve 2320 2321 This can be represented as a collection of submatrices as: 2322 2323 .vb 2324 A B C 2325 D E F 2326 G H I 2327 .ve 2328 2329 Where the submatrices A,B,C are owned by proc0, D,E,F are 2330 owned by proc1, G,H,I are owned by proc2. 2331 2332 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2333 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2334 The 'M','N' parameters are 8,8, and have the same values on all procs. 2335 2336 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2337 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2338 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2339 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2340 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2341 matrix, ans [DF] as another SeqAIJ matrix. 2342 2343 When d_nz, o_nz parameters are specified, d_nz storage elements are 2344 allocated for every row of the local diagonal submatrix, and o_nz 2345 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2346 One way to choose d_nz and o_nz is to use the max nonzerors per local 2347 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2348 In this case, the values of d_nz,o_nz are: 2349 .vb 2350 proc0 : dnz = 2, o_nz = 2 2351 proc1 : dnz = 3, o_nz = 2 2352 proc2 : dnz = 1, o_nz = 4 2353 .ve 2354 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2355 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2356 for proc3. i.e we are using 12+15+10=37 storage locations to store 2357 34 values. 2358 2359 When d_nnz, o_nnz parameters are specified, the storage is specified 2360 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2361 In the above case the values for d_nnz,o_nnz are: 2362 .vb 2363 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2364 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2365 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2366 .ve 2367 Here the space allocated is sum of all the above values i.e 34, and 2368 hence pre-allocation is perfect. 2369 2370 Level: intermediate 2371 2372 .keywords: matrix, aij, compressed row, sparse, parallel 2373 2374 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2375 @*/ 2376 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2377 { 2378 int ierr,(*f)(Mat,int,int*,int,int*); 2379 2380 PetscFunctionBegin; 2381 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2382 if (f) { 2383 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2384 } 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatCreateMPIAIJ" 2390 /*@C 2391 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2392 (the default parallel PETSc format). For good matrix assembly performance 2393 the user should preallocate the matrix storage by setting the parameters 2394 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2395 performance can be increased by more than a factor of 50. 2396 2397 Collective on MPI_Comm 2398 2399 Input Parameters: 2400 + comm - MPI communicator 2401 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2402 This value should be the same as the local size used in creating the 2403 y vector for the matrix-vector product y = Ax. 2404 . n - This value should be the same as the local size used in creating the 2405 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2406 calculated if N is given) For square matrices n is almost always m. 2407 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2408 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2409 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2410 (same value is used for all local rows) 2411 . d_nnz - array containing the number of nonzeros in the various rows of the 2412 DIAGONAL portion of the local submatrix (possibly different for each row) 2413 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2414 The size of this array is equal to the number of local rows, i.e 'm'. 2415 You must leave room for the diagonal entry even if it is zero. 2416 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2417 submatrix (same value is used for all local rows). 2418 - o_nnz - array containing the number of nonzeros in the various rows of the 2419 OFF-DIAGONAL portion of the local submatrix (possibly different for 2420 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2421 structure. The size of this array is equal to the number 2422 of local rows, i.e 'm'. 2423 2424 Output Parameter: 2425 . A - the matrix 2426 2427 Notes: 2428 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2429 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2430 storage requirements for this matrix. 2431 2432 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2433 processor than it must be used on all processors that share the object for 2434 that argument. 2435 2436 The AIJ format (also called the Yale sparse matrix format or 2437 compressed row storage), is fully compatible with standard Fortran 77 2438 storage. That is, the stored row and column indices can begin at 2439 either one (as in Fortran) or zero. See the users manual for details. 2440 2441 The user MUST specify either the local or global matrix dimensions 2442 (possibly both). 2443 2444 The parallel matrix is partitioned such that the first m0 rows belong to 2445 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2446 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2447 2448 The DIAGONAL portion of the local submatrix of a processor can be defined 2449 as the submatrix which is obtained by extraction the part corresponding 2450 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2451 first row that belongs to the processor, and r2 is the last row belonging 2452 to the this processor. This is a square mxm matrix. The remaining portion 2453 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2454 2455 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2456 2457 By default, this format uses inodes (identical nodes) when possible. 2458 We search for consecutive rows with the same nonzero structure, thereby 2459 reusing matrix information to achieve increased efficiency. 2460 2461 Options Database Keys: 2462 + -mat_aij_no_inode - Do not use inodes 2463 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2464 - -mat_aij_oneindex - Internally use indexing starting at 1 2465 rather than 0. Note that when calling MatSetValues(), 2466 the user still MUST index entries starting at 0! 2467 2468 2469 Example usage: 2470 2471 Consider the following 8x8 matrix with 34 non-zero values, that is 2472 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2473 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2474 as follows: 2475 2476 .vb 2477 1 2 0 | 0 3 0 | 0 4 2478 Proc0 0 5 6 | 7 0 0 | 8 0 2479 9 0 10 | 11 0 0 | 12 0 2480 ------------------------------------- 2481 13 0 14 | 15 16 17 | 0 0 2482 Proc1 0 18 0 | 19 20 21 | 0 0 2483 0 0 0 | 22 23 0 | 24 0 2484 ------------------------------------- 2485 Proc2 25 26 27 | 0 0 28 | 29 0 2486 30 0 0 | 31 32 33 | 0 34 2487 .ve 2488 2489 This can be represented as a collection of submatrices as: 2490 2491 .vb 2492 A B C 2493 D E F 2494 G H I 2495 .ve 2496 2497 Where the submatrices A,B,C are owned by proc0, D,E,F are 2498 owned by proc1, G,H,I are owned by proc2. 2499 2500 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2501 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2502 The 'M','N' parameters are 8,8, and have the same values on all procs. 2503 2504 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2505 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2506 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2507 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2508 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2509 matrix, ans [DF] as another SeqAIJ matrix. 2510 2511 When d_nz, o_nz parameters are specified, d_nz storage elements are 2512 allocated for every row of the local diagonal submatrix, and o_nz 2513 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2514 One way to choose d_nz and o_nz is to use the max nonzerors per local 2515 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2516 In this case, the values of d_nz,o_nz are: 2517 .vb 2518 proc0 : dnz = 2, o_nz = 2 2519 proc1 : dnz = 3, o_nz = 2 2520 proc2 : dnz = 1, o_nz = 4 2521 .ve 2522 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2523 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2524 for proc3. i.e we are using 12+15+10=37 storage locations to store 2525 34 values. 2526 2527 When d_nnz, o_nnz parameters are specified, the storage is specified 2528 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2529 In the above case the values for d_nnz,o_nnz are: 2530 .vb 2531 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2532 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2533 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2534 .ve 2535 Here the space allocated is sum of all the above values i.e 34, and 2536 hence pre-allocation is perfect. 2537 2538 Level: intermediate 2539 2540 .keywords: matrix, aij, compressed row, sparse, parallel 2541 2542 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2543 @*/ 2544 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2545 { 2546 int ierr,size; 2547 2548 PetscFunctionBegin; 2549 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2550 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2551 if (size > 1) { 2552 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2553 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2554 } else { 2555 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2556 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2563 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2564 { 2565 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2566 PetscFunctionBegin; 2567 *Ad = a->A; 2568 *Ao = a->B; 2569 *colmap = a->garray; 2570 PetscFunctionReturn(0); 2571 } 2572 2573 #undef __FUNCT__ 2574 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2575 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2576 { 2577 int ierr,i; 2578 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2579 2580 PetscFunctionBegin; 2581 if (coloring->ctype == IS_COLORING_LOCAL) { 2582 ISColoringValue *allcolors,*colors; 2583 ISColoring ocoloring; 2584 2585 /* set coloring for diagonal portion */ 2586 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2587 2588 /* set coloring for off-diagonal portion */ 2589 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2590 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2591 for (i=0; i<a->B->n; i++) { 2592 colors[i] = allcolors[a->garray[i]]; 2593 } 2594 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2595 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2596 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2597 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2598 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2599 ISColoringValue *colors; 2600 int *larray; 2601 ISColoring ocoloring; 2602 2603 /* set coloring for diagonal portion */ 2604 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2605 for (i=0; i<a->A->n; i++) { 2606 larray[i] = i + a->cstart; 2607 } 2608 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2609 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2610 for (i=0; i<a->A->n; i++) { 2611 colors[i] = coloring->colors[larray[i]]; 2612 } 2613 ierr = PetscFree(larray);CHKERRQ(ierr); 2614 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2615 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2616 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2617 2618 /* set coloring for off-diagonal portion */ 2619 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2620 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2621 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2622 for (i=0; i<a->B->n; i++) { 2623 colors[i] = coloring->colors[larray[i]]; 2624 } 2625 ierr = PetscFree(larray);CHKERRQ(ierr); 2626 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2627 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2628 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2629 } else { 2630 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2631 } 2632 2633 PetscFunctionReturn(0); 2634 } 2635 2636 #undef __FUNCT__ 2637 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2638 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2639 { 2640 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2641 int ierr; 2642 2643 PetscFunctionBegin; 2644 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2645 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2646 PetscFunctionReturn(0); 2647 } 2648 2649 #undef __FUNCT__ 2650 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2651 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2652 { 2653 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2654 int ierr; 2655 2656 PetscFunctionBegin; 2657 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2658 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2659 PetscFunctionReturn(0); 2660 } 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatMerge" 2664 /*@C 2665 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2666 matrices from each processor 2667 2668 Collective on MPI_Comm 2669 2670 Input Parameters: 2671 + comm - the communicators the parallel matrix will live on 2672 - inmat - the input sequential matrices 2673 2674 Output Parameter: 2675 . outmat - the parallel matrix generated 2676 2677 Level: advanced 2678 2679 Notes: The number of columns of the matrix in EACH of the seperate files 2680 MUST be the same. 2681 2682 @*/ 2683 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2684 { 2685 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2686 PetscScalar *values; 2687 PetscMap columnmap,rowmap; 2688 2689 PetscFunctionBegin; 2690 2691 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2692 2693 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2694 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2695 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2696 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2697 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2698 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2699 2700 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2701 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2702 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2703 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2704 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2705 2706 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2707 for (i=0;i<m;i++) { 2708 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2709 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2710 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2711 } 2712 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2713 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2714 2715 for (i=0;i<m;i++) { 2716 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2717 I = i + rstart; 2718 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2719 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2720 } 2721 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2722 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2723 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2724 2725 PetscFunctionReturn(0); 2726 } 2727 2728 #undef __FUNCT__ 2729 #define __FUNCT__ "MatFileSplit" 2730 int MatFileSplit(Mat A,char *outfile) 2731 { 2732 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2733 PetscViewer out; 2734 char *name; 2735 Mat B; 2736 PetscScalar *values; 2737 2738 PetscFunctionBegin; 2739 2740 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2741 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2742 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2743 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2744 for (i=0;i<m;i++) { 2745 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2746 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2747 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2748 } 2749 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2750 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2751 2752 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2753 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2754 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2755 sprintf(name,"%s.%d",outfile,rank); 2756 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2757 ierr = PetscFree(name); 2758 ierr = MatView(B,out);CHKERRQ(ierr); 2759 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2760 ierr = MatDestroy(B);CHKERRQ(ierr); 2761 PetscFunctionReturn(0); 2762 } 2763