1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift=0; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 #if defined(PETSC_HAVE_SUPERLUDIST) 413 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 414 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 415 #endif 416 417 #if defined(PETSC_HAVE_SPOOLES) 418 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 419 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 420 #endif 421 422 #if defined(PETSC_HAVE_MUMPS) 423 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 424 if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); } 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 431 int MatZeroEntries_MPIAIJ(Mat A) 432 { 433 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 434 int ierr; 435 436 PetscFunctionBegin; 437 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 438 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatZeroRows_MPIAIJ" 444 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 445 { 446 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 447 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 448 int *nprocs,j,idx,nsends,row; 449 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 450 int *rvalues,tag = A->tag,count,base,slen,n,*source; 451 int *lens,imdex,*lrows,*values,rstart=l->rstart; 452 MPI_Comm comm = A->comm; 453 MPI_Request *send_waits,*recv_waits; 454 MPI_Status recv_status,*send_status; 455 IS istmp; 456 PetscTruth found; 457 458 PetscFunctionBegin; 459 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 460 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 461 462 /* first count number of contributors to each processor */ 463 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 464 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 465 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 466 for (i=0; i<N; i++) { 467 idx = rows[i]; 468 found = PETSC_FALSE; 469 for (j=0; j<size; j++) { 470 if (idx >= owners[j] && idx < owners[j+1]) { 471 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 472 } 473 } 474 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 475 } 476 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 477 478 /* inform other processors of number of messages and max length*/ 479 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 480 481 /* post receives: */ 482 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 484 for (i=0; i<nrecvs; i++) { 485 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 486 } 487 488 /* do sends: 489 1) starts[i] gives the starting index in svalues for stuff going to 490 the ith processor 491 */ 492 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 493 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 494 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 495 starts[0] = 0; 496 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 497 for (i=0; i<N; i++) { 498 svalues[starts[owner[i]]++] = rows[i]; 499 } 500 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 501 502 starts[0] = 0; 503 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 504 count = 0; 505 for (i=0; i<size; i++) { 506 if (nprocs[2*i+1]) { 507 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 508 } 509 } 510 ierr = PetscFree(starts);CHKERRQ(ierr); 511 512 base = owners[rank]; 513 514 /* wait on receives */ 515 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 516 source = lens + nrecvs; 517 count = nrecvs; slen = 0; 518 while (count) { 519 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 520 /* unpack receives into our local space */ 521 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 522 source[imdex] = recv_status.MPI_SOURCE; 523 lens[imdex] = n; 524 slen += n; 525 count--; 526 } 527 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 528 529 /* move the data into the send scatter */ 530 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 531 count = 0; 532 for (i=0; i<nrecvs; i++) { 533 values = rvalues + i*nmax; 534 for (j=0; j<lens[i]; j++) { 535 lrows[count++] = values[j] - base; 536 } 537 } 538 ierr = PetscFree(rvalues);CHKERRQ(ierr); 539 ierr = PetscFree(lens);CHKERRQ(ierr); 540 ierr = PetscFree(owner);CHKERRQ(ierr); 541 ierr = PetscFree(nprocs);CHKERRQ(ierr); 542 543 /* actually zap the local rows */ 544 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 545 PetscLogObjectParent(A,istmp); 546 547 /* 548 Zero the required rows. If the "diagonal block" of the matrix 549 is square and the user wishes to set the diagonal we use seperate 550 code so that MatSetValues() is not called for each diagonal allocating 551 new memory, thus calling lots of mallocs and slowing things down. 552 553 Contributed by: Mathew Knepley 554 */ 555 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 556 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 557 if (diag && (l->A->M == l->A->N)) { 558 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 559 } else if (diag) { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 562 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 564 } 565 for (i = 0; i < slen; i++) { 566 row = lrows[i] + rstart; 567 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 568 } 569 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 } else { 572 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 573 } 574 ierr = ISDestroy(istmp);CHKERRQ(ierr); 575 ierr = PetscFree(lrows);CHKERRQ(ierr); 576 577 /* wait on sends */ 578 if (nsends) { 579 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 580 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 581 ierr = PetscFree(send_status);CHKERRQ(ierr); 582 } 583 ierr = PetscFree(send_waits);CHKERRQ(ierr); 584 ierr = PetscFree(svalues);CHKERRQ(ierr); 585 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMult_MPIAIJ" 591 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 int ierr,nt; 595 596 PetscFunctionBegin; 597 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 598 if (nt != A->n) { 599 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 600 } 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultAdd_MPIAIJ" 610 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 int ierr; 614 615 PetscFunctionBegin; 616 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 617 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 618 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 619 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 625 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 626 { 627 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 628 int ierr; 629 630 PetscFunctionBegin; 631 /* do nondiagonal part */ 632 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 633 /* send it on its way */ 634 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 635 /* do local part */ 636 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 637 /* receive remote parts: note this assumes the values are not actually */ 638 /* inserted in yy until the next line, which is true for my implementation*/ 639 /* but is not perhaps always true. */ 640 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } 643 644 EXTERN_C_BEGIN 645 #undef __FUNCT__ 646 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 647 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 648 { 649 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 650 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 651 MatType type; 652 IS Me,Notme; 653 int M,N,first,last,*notme,i, ierr; 654 655 PetscFunctionBegin; 656 657 /* Compatible types */ 658 ierr = MatGetType(Bmat,&type); CHKERRQ(ierr); 659 ierr = PetscStrcmp(type,MATMPIAIJ,f); CHKERRQ(ierr); 660 if (!*f) SETERRQ(1,"Second matrix needs to be MPIAIJ too"); 661 662 /* Easy test: symmetric diagonal block */ 663 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 664 ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr); 665 if (!*f) PetscFunctionReturn(0); 666 667 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 668 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 669 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 670 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 671 for (i=0; i<first; i++) notme[i] = i; 672 for (i=last; i<M; i++) notme[i-last+first] = i; 673 ierr = ISCreateGeneral 674 (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 675 ierr = ISCreateStride 676 (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 677 ierr = MatGetSubMatrices 678 (Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 679 Aoff = Aoffs[0]; 680 ierr = MatGetSubMatrices 681 (Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 682 Boff = Boffs[0]; 683 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 684 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 685 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 686 ierr = ISDestroy(Me); CHKERRQ(ierr); 687 ierr = ISDestroy(Notme); CHKERRQ(ierr); 688 689 PetscFunctionReturn(0); 690 } 691 EXTERN_C_END 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 695 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 696 { 697 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 698 int ierr; 699 700 PetscFunctionBegin; 701 /* do nondiagonal part */ 702 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 703 /* send it on its way */ 704 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 705 /* do local part */ 706 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 707 /* receive remote parts: note this assumes the values are not actually */ 708 /* inserted in yy until the next line, which is true for my implementation*/ 709 /* but is not perhaps always true. */ 710 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 714 /* 715 This only works correctly for square matrices where the subblock A->A is the 716 diagonal block 717 */ 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 720 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 721 { 722 int ierr; 723 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 724 725 PetscFunctionBegin; 726 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 727 if (a->rstart != a->cstart || a->rend != a->cend) { 728 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 729 } 730 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatScale_MPIAIJ" 736 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 737 { 738 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 739 int ierr; 740 741 PetscFunctionBegin; 742 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 743 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatDestroy_MPIAIJ" 749 int MatDestroy_MPIAIJ(Mat mat) 750 { 751 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 752 int ierr; 753 754 PetscFunctionBegin; 755 #if defined(PETSC_USE_LOG) 756 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 757 #endif 758 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 759 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 760 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 761 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 762 #if defined (PETSC_USE_CTABLE) 763 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 764 #else 765 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 766 #endif 767 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 768 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 769 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 770 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 771 ierr = PetscFree(aij);CHKERRQ(ierr); 772 PetscFunctionReturn(0); 773 } 774 775 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 776 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 777 extern int MatFactorInfo_MUMPS(Mat,PetscViewer); 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "MatView_MPIAIJ_Binary" 781 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 782 { 783 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 784 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 785 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 786 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 787 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 788 PetscScalar *column_values; 789 790 PetscFunctionBegin; 791 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 792 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 793 nz = A->nz + B->nz; 794 if (rank == 0) { 795 header[0] = MAT_FILE_COOKIE; 796 header[1] = mat->M; 797 header[2] = mat->N; 798 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 800 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 801 /* get largest number of rows any processor has */ 802 rlen = mat->m; 803 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 804 for (i=1; i<size; i++) { 805 rlen = PetscMax(rlen,range[i+1] - range[i]); 806 } 807 } else { 808 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 809 rlen = mat->m; 810 } 811 812 /* load up the local row counts */ 813 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 814 for (i=0; i<mat->m; i++) { 815 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 816 } 817 818 /* store the row lengths to the file */ 819 if (rank == 0) { 820 MPI_Status status; 821 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 822 for (i=1; i<size; i++) { 823 rlen = range[i+1] - range[i]; 824 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 825 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 826 } 827 } else { 828 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 831 832 /* load up the local column indices */ 833 nzmax = nz; /* )th processor needs space a largest processor needs */ 834 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 835 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 836 cnt = 0; 837 for (i=0; i<mat->m; i++) { 838 for (j=B->i[i]; j<B->i[i+1]; j++) { 839 if ( (col = garray[B->j[j]]) > cstart) break; 840 column_indices[cnt++] = col; 841 } 842 for (k=A->i[i]; k<A->i[i+1]; k++) { 843 column_indices[cnt++] = A->j[k] + cstart; 844 } 845 for (; j<B->i[i+1]; j++) { 846 column_indices[cnt++] = garray[B->j[j]]; 847 } 848 } 849 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 850 851 /* store the column indices to the file */ 852 if (rank == 0) { 853 MPI_Status status; 854 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 855 for (i=1; i<size; i++) { 856 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 857 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 858 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 859 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 860 } 861 } else { 862 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 863 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(column_indices);CHKERRQ(ierr); 866 867 /* load up the local column values */ 868 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 869 cnt = 0; 870 for (i=0; i<mat->m; i++) { 871 for (j=B->i[i]; j<B->i[i+1]; j++) { 872 if ( garray[B->j[j]] > cstart) break; 873 column_values[cnt++] = B->a[j]; 874 } 875 for (k=A->i[i]; k<A->i[i+1]; k++) { 876 column_values[cnt++] = A->a[k]; 877 } 878 for (; j<B->i[i+1]; j++) { 879 column_values[cnt++] = B->a[j]; 880 } 881 } 882 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 883 884 /* store the column values to the file */ 885 if (rank == 0) { 886 MPI_Status status; 887 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 888 for (i=1; i<size; i++) { 889 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 890 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 891 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 892 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 893 } 894 } else { 895 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 896 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 897 } 898 ierr = PetscFree(column_values);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 904 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 905 { 906 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 907 int ierr,rank = aij->rank,size = aij->size; 908 PetscTruth isdraw,isascii,flg,isbinary; 909 PetscViewer sviewer; 910 PetscViewerFormat format; 911 912 PetscFunctionBegin; 913 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 914 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 915 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 916 if (isascii) { 917 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 918 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 919 MatInfo info; 920 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 921 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 922 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 923 if (flg) { 924 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 925 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 926 } else { 927 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 928 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 929 } 930 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 931 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 932 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 933 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 934 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 935 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } else if (format == PETSC_VIEWER_ASCII_INFO) { 938 PetscFunctionReturn(0); 939 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 940 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 941 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 942 #endif 943 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 944 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 945 #endif 946 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE) 947 ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr); 948 #endif 949 PetscFunctionReturn(0); 950 } 951 } else if (isbinary) { 952 if (size == 1) { 953 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 954 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 955 } else { 956 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 957 } 958 PetscFunctionReturn(0); 959 } else if (isdraw) { 960 PetscDraw draw; 961 PetscTruth isnull; 962 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 963 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 964 } 965 966 if (size == 1) { 967 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 968 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 969 } else { 970 /* assemble the entire matrix onto first processor. */ 971 Mat A; 972 Mat_SeqAIJ *Aloc; 973 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 974 PetscScalar *a; 975 976 if (!rank) { 977 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 978 } else { 979 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 980 } 981 PetscLogObjectParent(mat,A); 982 983 /* copy over the A part */ 984 Aloc = (Mat_SeqAIJ*)aij->A->data; 985 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 986 row = aij->rstart; 987 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 988 for (i=0; i<m; i++) { 989 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 990 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 991 } 992 aj = Aloc->j; 993 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 994 995 /* copy over the B part */ 996 Aloc = (Mat_SeqAIJ*)aij->B->data; 997 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 998 row = aij->rstart; 999 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 1000 ct = cols; 1001 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1002 for (i=0; i<m; i++) { 1003 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1004 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1005 } 1006 ierr = PetscFree(ct);CHKERRQ(ierr); 1007 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1008 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1009 /* 1010 Everyone has to call to draw the matrix since the graphics waits are 1011 synchronized across all processors that share the PetscDraw object 1012 */ 1013 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1014 if (!rank) { 1015 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1016 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1017 } 1018 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1019 ierr = MatDestroy(A);CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatView_MPIAIJ" 1026 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1027 { 1028 int ierr; 1029 PetscTruth isascii,isdraw,issocket,isbinary; 1030 1031 PetscFunctionBegin; 1032 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1033 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1034 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1035 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1036 if (isascii || isdraw || isbinary || issocket) { 1037 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1038 } else { 1039 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1040 } 1041 PetscFunctionReturn(0); 1042 } 1043 1044 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatRelax_MPIAIJ" 1048 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1049 { 1050 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1051 int ierr; 1052 Vec bb1; 1053 PetscScalar mone=-1.0; 1054 1055 PetscFunctionBegin; 1056 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1057 1058 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1059 1060 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1061 if (flag & SOR_ZERO_INITIAL_GUESS) { 1062 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1063 its--; 1064 } 1065 1066 while (its--) { 1067 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 1070 /* update rhs: bb1 = bb - B*x */ 1071 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1072 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1073 1074 /* local sweep */ 1075 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1076 CHKERRQ(ierr); 1077 } 1078 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1079 if (flag & SOR_ZERO_INITIAL_GUESS) { 1080 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1081 its--; 1082 } 1083 while (its--) { 1084 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1085 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1086 1087 /* update rhs: bb1 = bb - B*x */ 1088 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1089 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1090 1091 /* local sweep */ 1092 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1093 CHKERRQ(ierr); 1094 } 1095 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1096 if (flag & SOR_ZERO_INITIAL_GUESS) { 1097 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1098 its--; 1099 } 1100 while (its--) { 1101 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1102 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1103 1104 /* update rhs: bb1 = bb - B*x */ 1105 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1106 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1107 1108 /* local sweep */ 1109 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1110 CHKERRQ(ierr); 1111 } 1112 } else { 1113 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1114 } 1115 1116 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1122 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1123 { 1124 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1125 Mat A = mat->A,B = mat->B; 1126 int ierr; 1127 PetscReal isend[5],irecv[5]; 1128 1129 PetscFunctionBegin; 1130 info->block_size = 1.0; 1131 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1132 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1133 isend[3] = info->memory; isend[4] = info->mallocs; 1134 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1135 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1136 isend[3] += info->memory; isend[4] += info->mallocs; 1137 if (flag == MAT_LOCAL) { 1138 info->nz_used = isend[0]; 1139 info->nz_allocated = isend[1]; 1140 info->nz_unneeded = isend[2]; 1141 info->memory = isend[3]; 1142 info->mallocs = isend[4]; 1143 } else if (flag == MAT_GLOBAL_MAX) { 1144 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1145 info->nz_used = irecv[0]; 1146 info->nz_allocated = irecv[1]; 1147 info->nz_unneeded = irecv[2]; 1148 info->memory = irecv[3]; 1149 info->mallocs = irecv[4]; 1150 } else if (flag == MAT_GLOBAL_SUM) { 1151 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1152 info->nz_used = irecv[0]; 1153 info->nz_allocated = irecv[1]; 1154 info->nz_unneeded = irecv[2]; 1155 info->memory = irecv[3]; 1156 info->mallocs = irecv[4]; 1157 } 1158 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1159 info->fill_ratio_needed = 0; 1160 info->factor_mallocs = 0; 1161 info->rows_global = (double)matin->M; 1162 info->columns_global = (double)matin->N; 1163 info->rows_local = (double)matin->m; 1164 info->columns_local = (double)matin->N; 1165 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatSetOption_MPIAIJ" 1171 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1172 { 1173 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1174 int ierr; 1175 1176 PetscFunctionBegin; 1177 switch (op) { 1178 case MAT_NO_NEW_NONZERO_LOCATIONS: 1179 case MAT_YES_NEW_NONZERO_LOCATIONS: 1180 case MAT_COLUMNS_UNSORTED: 1181 case MAT_COLUMNS_SORTED: 1182 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1183 case MAT_KEEP_ZEROED_ROWS: 1184 case MAT_NEW_NONZERO_LOCATION_ERR: 1185 case MAT_USE_INODES: 1186 case MAT_DO_NOT_USE_INODES: 1187 case MAT_IGNORE_ZERO_ENTRIES: 1188 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1189 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1190 break; 1191 case MAT_ROW_ORIENTED: 1192 a->roworiented = PETSC_TRUE; 1193 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1194 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1195 break; 1196 case MAT_ROWS_SORTED: 1197 case MAT_ROWS_UNSORTED: 1198 case MAT_YES_NEW_DIAGONALS: 1199 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1200 break; 1201 case MAT_COLUMN_ORIENTED: 1202 a->roworiented = PETSC_FALSE; 1203 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1204 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1205 break; 1206 case MAT_IGNORE_OFF_PROC_ENTRIES: 1207 a->donotstash = PETSC_TRUE; 1208 break; 1209 case MAT_NO_NEW_DIAGONALS: 1210 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1211 default: 1212 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1213 } 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatGetRow_MPIAIJ" 1219 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1220 { 1221 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1222 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1223 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1224 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1225 int *cmap,*idx_p; 1226 1227 PetscFunctionBegin; 1228 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1229 mat->getrowactive = PETSC_TRUE; 1230 1231 if (!mat->rowvalues && (idx || v)) { 1232 /* 1233 allocate enough space to hold information from the longest row. 1234 */ 1235 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1236 int max = 1,tmp; 1237 for (i=0; i<matin->m; i++) { 1238 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1239 if (max < tmp) { max = tmp; } 1240 } 1241 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1242 mat->rowindices = (int*)(mat->rowvalues + max); 1243 } 1244 1245 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1246 lrow = row - rstart; 1247 1248 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1249 if (!v) {pvA = 0; pvB = 0;} 1250 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1251 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1252 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1253 nztot = nzA + nzB; 1254 1255 cmap = mat->garray; 1256 if (v || idx) { 1257 if (nztot) { 1258 /* Sort by increasing column numbers, assuming A and B already sorted */ 1259 int imark = -1; 1260 if (v) { 1261 *v = v_p = mat->rowvalues; 1262 for (i=0; i<nzB; i++) { 1263 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1264 else break; 1265 } 1266 imark = i; 1267 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1268 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1269 } 1270 if (idx) { 1271 *idx = idx_p = mat->rowindices; 1272 if (imark > -1) { 1273 for (i=0; i<imark; i++) { 1274 idx_p[i] = cmap[cworkB[i]]; 1275 } 1276 } else { 1277 for (i=0; i<nzB; i++) { 1278 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1279 else break; 1280 } 1281 imark = i; 1282 } 1283 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1284 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1285 } 1286 } else { 1287 if (idx) *idx = 0; 1288 if (v) *v = 0; 1289 } 1290 } 1291 *nz = nztot; 1292 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1293 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1299 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1300 { 1301 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1302 1303 PetscFunctionBegin; 1304 if (aij->getrowactive == PETSC_FALSE) { 1305 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1306 } 1307 aij->getrowactive = PETSC_FALSE; 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatNorm_MPIAIJ" 1313 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1314 { 1315 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1316 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1317 int ierr,i,j,cstart = aij->cstart; 1318 PetscReal sum = 0.0; 1319 PetscScalar *v; 1320 1321 PetscFunctionBegin; 1322 if (aij->size == 1) { 1323 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1324 } else { 1325 if (type == NORM_FROBENIUS) { 1326 v = amat->a; 1327 for (i=0; i<amat->nz; i++) { 1328 #if defined(PETSC_USE_COMPLEX) 1329 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1330 #else 1331 sum += (*v)*(*v); v++; 1332 #endif 1333 } 1334 v = bmat->a; 1335 for (i=0; i<bmat->nz; i++) { 1336 #if defined(PETSC_USE_COMPLEX) 1337 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1338 #else 1339 sum += (*v)*(*v); v++; 1340 #endif 1341 } 1342 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1343 *norm = sqrt(*norm); 1344 } else if (type == NORM_1) { /* max column norm */ 1345 PetscReal *tmp,*tmp2; 1346 int *jj,*garray = aij->garray; 1347 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1348 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1349 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1350 *norm = 0.0; 1351 v = amat->a; jj = amat->j; 1352 for (j=0; j<amat->nz; j++) { 1353 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1354 } 1355 v = bmat->a; jj = bmat->j; 1356 for (j=0; j<bmat->nz; j++) { 1357 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1358 } 1359 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1360 for (j=0; j<mat->N; j++) { 1361 if (tmp2[j] > *norm) *norm = tmp2[j]; 1362 } 1363 ierr = PetscFree(tmp);CHKERRQ(ierr); 1364 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1365 } else if (type == NORM_INFINITY) { /* max row norm */ 1366 PetscReal ntemp = 0.0; 1367 for (j=0; j<aij->A->m; j++) { 1368 v = amat->a + amat->i[j]; 1369 sum = 0.0; 1370 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1371 sum += PetscAbsScalar(*v); v++; 1372 } 1373 v = bmat->a + bmat->i[j]; 1374 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1375 sum += PetscAbsScalar(*v); v++; 1376 } 1377 if (sum > ntemp) ntemp = sum; 1378 } 1379 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1380 } else { 1381 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1382 } 1383 } 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatTranspose_MPIAIJ" 1389 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1390 { 1391 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1392 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1393 int ierr; 1394 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1395 Mat B; 1396 PetscScalar *array; 1397 1398 PetscFunctionBegin; 1399 if (!matout && M != N) { 1400 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1401 } 1402 1403 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1404 1405 /* copy over the A part */ 1406 Aloc = (Mat_SeqAIJ*)a->A->data; 1407 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1408 row = a->rstart; 1409 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1410 for (i=0; i<m; i++) { 1411 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1412 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1413 } 1414 aj = Aloc->j; 1415 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1416 1417 /* copy over the B part */ 1418 Aloc = (Mat_SeqAIJ*)a->B->data; 1419 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1420 row = a->rstart; 1421 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1422 ct = cols; 1423 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1424 for (i=0; i<m; i++) { 1425 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1426 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1427 } 1428 ierr = PetscFree(ct);CHKERRQ(ierr); 1429 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1430 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1431 if (matout) { 1432 *matout = B; 1433 } else { 1434 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1435 } 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1441 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1442 { 1443 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1444 Mat a = aij->A,b = aij->B; 1445 int ierr,s1,s2,s3; 1446 1447 PetscFunctionBegin; 1448 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1449 if (rr) { 1450 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1451 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1452 /* Overlap communication with computation. */ 1453 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1454 } 1455 if (ll) { 1456 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1457 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1458 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1459 } 1460 /* scale the diagonal block */ 1461 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1462 1463 if (rr) { 1464 /* Do a scatter end and then right scale the off-diagonal block */ 1465 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1466 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1467 } 1468 1469 PetscFunctionReturn(0); 1470 } 1471 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1475 int MatPrintHelp_MPIAIJ(Mat A) 1476 { 1477 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1478 int ierr; 1479 1480 PetscFunctionBegin; 1481 if (!a->rank) { 1482 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1489 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1490 { 1491 PetscFunctionBegin; 1492 *bs = 1; 1493 PetscFunctionReturn(0); 1494 } 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1497 int MatSetUnfactored_MPIAIJ(Mat A) 1498 { 1499 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1500 int ierr; 1501 1502 PetscFunctionBegin; 1503 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatEqual_MPIAIJ" 1509 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1510 { 1511 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1512 Mat a,b,c,d; 1513 PetscTruth flg; 1514 int ierr; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1518 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1519 a = matA->A; b = matA->B; 1520 c = matB->A; d = matB->B; 1521 1522 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1523 if (flg == PETSC_TRUE) { 1524 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1525 } 1526 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatCopy_MPIAIJ" 1532 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1533 { 1534 int ierr; 1535 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1536 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1537 PetscTruth flg; 1538 1539 PetscFunctionBegin; 1540 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1541 if (str != SAME_NONZERO_PATTERN || !flg) { 1542 /* because of the column compression in the off-processor part of the matrix a->B, 1543 the number of columns in a->B and b->B may be different, hence we cannot call 1544 the MatCopy() directly on the two parts. If need be, we can provide a more 1545 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1546 then copying the submatrices */ 1547 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1550 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1551 } 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1557 int MatSetUpPreallocation_MPIAIJ(Mat A) 1558 { 1559 int ierr; 1560 1561 PetscFunctionBegin; 1562 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1567 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1568 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1569 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1570 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1571 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1572 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1573 #endif 1574 1575 #include "petscblaslapack.h" 1576 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure); 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatAXPY_MPIAIJ" 1579 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1580 { 1581 int ierr,one=1,i; 1582 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1583 Mat_SeqAIJ *x,*y; 1584 1585 PetscFunctionBegin; 1586 if (str == SAME_NONZERO_PATTERN) { 1587 x = (Mat_SeqAIJ *)xx->A->data; 1588 y = (Mat_SeqAIJ *)yy->A->data; 1589 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1590 x = (Mat_SeqAIJ *)xx->B->data; 1591 y = (Mat_SeqAIJ *)yy->B->data; 1592 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1593 } else if (str == SUBSET_NONZERO_PATTERN) { 1594 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1595 1596 x = (Mat_SeqAIJ *)xx->B->data; 1597 y = (Mat_SeqAIJ *)yy->B->data; 1598 if (y->xtoy && y->XtoY != xx->B) { 1599 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1600 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1601 } 1602 if (!y->xtoy) { /* get xtoy */ 1603 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1604 y->XtoY = xx->B; 1605 } 1606 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1607 } else { 1608 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1609 } 1610 PetscFunctionReturn(0); 1611 } 1612 1613 /* -------------------------------------------------------------------*/ 1614 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1615 MatGetRow_MPIAIJ, 1616 MatRestoreRow_MPIAIJ, 1617 MatMult_MPIAIJ, 1618 MatMultAdd_MPIAIJ, 1619 MatMultTranspose_MPIAIJ, 1620 MatMultTransposeAdd_MPIAIJ, 1621 0, 1622 0, 1623 0, 1624 0, 1625 0, 1626 0, 1627 MatRelax_MPIAIJ, 1628 MatTranspose_MPIAIJ, 1629 MatGetInfo_MPIAIJ, 1630 MatEqual_MPIAIJ, 1631 MatGetDiagonal_MPIAIJ, 1632 MatDiagonalScale_MPIAIJ, 1633 MatNorm_MPIAIJ, 1634 MatAssemblyBegin_MPIAIJ, 1635 MatAssemblyEnd_MPIAIJ, 1636 0, 1637 MatSetOption_MPIAIJ, 1638 MatZeroEntries_MPIAIJ, 1639 MatZeroRows_MPIAIJ, 1640 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1641 MatLUFactorSymbolic_MPIAIJ_TFS, 1642 #else 1643 0, 1644 #endif 1645 0, 1646 0, 1647 0, 1648 MatSetUpPreallocation_MPIAIJ, 1649 0, 1650 0, 1651 0, 1652 0, 1653 MatDuplicate_MPIAIJ, 1654 0, 1655 0, 1656 0, 1657 0, 1658 MatAXPY_MPIAIJ, 1659 MatGetSubMatrices_MPIAIJ, 1660 MatIncreaseOverlap_MPIAIJ, 1661 MatGetValues_MPIAIJ, 1662 MatCopy_MPIAIJ, 1663 MatPrintHelp_MPIAIJ, 1664 MatScale_MPIAIJ, 1665 0, 1666 0, 1667 0, 1668 MatGetBlockSize_MPIAIJ, 1669 0, 1670 0, 1671 0, 1672 0, 1673 MatFDColoringCreate_MPIAIJ, 1674 0, 1675 MatSetUnfactored_MPIAIJ, 1676 0, 1677 0, 1678 MatGetSubMatrix_MPIAIJ, 1679 MatDestroy_MPIAIJ, 1680 MatView_MPIAIJ, 1681 MatGetPetscMaps_Petsc, 1682 0, 1683 0, 1684 0, 1685 0, 1686 0, 1687 0, 1688 0, 1689 0, 1690 MatSetColoring_MPIAIJ, 1691 MatSetValuesAdic_MPIAIJ, 1692 MatSetValuesAdifor_MPIAIJ 1693 }; 1694 1695 /* ----------------------------------------------------------------------------------------*/ 1696 1697 EXTERN_C_BEGIN 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1700 int MatStoreValues_MPIAIJ(Mat mat) 1701 { 1702 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1703 int ierr; 1704 1705 PetscFunctionBegin; 1706 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1707 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1708 PetscFunctionReturn(0); 1709 } 1710 EXTERN_C_END 1711 1712 EXTERN_C_BEGIN 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1715 int MatRetrieveValues_MPIAIJ(Mat mat) 1716 { 1717 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1718 int ierr; 1719 1720 PetscFunctionBegin; 1721 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1722 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 EXTERN_C_END 1726 1727 #include "petscpc.h" 1728 EXTERN_C_BEGIN 1729 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1730 EXTERN_C_END 1731 1732 EXTERN_C_BEGIN 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1735 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1736 { 1737 Mat_MPIAIJ *b; 1738 int ierr,i; 1739 PetscTruth flg2; 1740 1741 PetscFunctionBegin; 1742 B->preallocated = PETSC_TRUE; 1743 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1744 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1745 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1746 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1747 if (d_nnz) { 1748 for (i=0; i<B->m; i++) { 1749 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1750 } 1751 } 1752 if (o_nnz) { 1753 for (i=0; i<B->m; i++) { 1754 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1755 } 1756 } 1757 b = (Mat_MPIAIJ*)B->data; 1758 1759 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1760 PetscLogObjectParent(B,b->A); 1761 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1762 PetscLogObjectParent(B,b->B); 1763 1764 PetscFunctionReturn(0); 1765 } 1766 EXTERN_C_END 1767 1768 EXTERN_C_BEGIN 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatCreate_MPIAIJ" 1771 int MatCreate_MPIAIJ(Mat B) 1772 { 1773 Mat_MPIAIJ *b; 1774 int ierr,i,size; 1775 1776 PetscFunctionBegin; 1777 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1778 1779 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1780 B->data = (void*)b; 1781 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1782 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1783 B->factor = 0; 1784 B->assembled = PETSC_FALSE; 1785 B->mapping = 0; 1786 1787 B->insertmode = NOT_SET_VALUES; 1788 b->size = size; 1789 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1790 1791 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1792 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1793 1794 /* the information in the maps duplicates the information computed below, eventually 1795 we should remove the duplicate information that is not contained in the maps */ 1796 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1797 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1798 1799 /* build local table of row and column ownerships */ 1800 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1801 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1802 b->cowners = b->rowners + b->size + 2; 1803 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1804 b->rowners[0] = 0; 1805 for (i=2; i<=b->size; i++) { 1806 b->rowners[i] += b->rowners[i-1]; 1807 } 1808 b->rstart = b->rowners[b->rank]; 1809 b->rend = b->rowners[b->rank+1]; 1810 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1811 b->cowners[0] = 0; 1812 for (i=2; i<=b->size; i++) { 1813 b->cowners[i] += b->cowners[i-1]; 1814 } 1815 b->cstart = b->cowners[b->rank]; 1816 b->cend = b->cowners[b->rank+1]; 1817 1818 /* build cache for off array entries formed */ 1819 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1820 b->donotstash = PETSC_FALSE; 1821 b->colmap = 0; 1822 b->garray = 0; 1823 b->roworiented = PETSC_TRUE; 1824 1825 /* stuff used for matrix vector multiply */ 1826 b->lvec = PETSC_NULL; 1827 b->Mvctx = PETSC_NULL; 1828 1829 /* stuff for MatGetRow() */ 1830 b->rowindices = 0; 1831 b->rowvalues = 0; 1832 b->getrowactive = PETSC_FALSE; 1833 1834 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1835 "MatStoreValues_MPIAIJ", 1836 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1837 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1838 "MatRetrieveValues_MPIAIJ", 1839 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1840 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1841 "MatGetDiagonalBlock_MPIAIJ", 1842 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1843 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1844 "MatIsSymmetric_MPIAIJ", 1845 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1847 "MatMPIAIJSetPreallocation_MPIAIJ", 1848 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1849 PetscFunctionReturn(0); 1850 } 1851 EXTERN_C_END 1852 1853 #undef __FUNCT__ 1854 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1855 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1856 { 1857 Mat mat; 1858 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1859 int ierr; 1860 1861 PetscFunctionBegin; 1862 *newmat = 0; 1863 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1864 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1865 a = (Mat_MPIAIJ*)mat->data; 1866 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1867 mat->factor = matin->factor; 1868 mat->assembled = PETSC_TRUE; 1869 mat->insertmode = NOT_SET_VALUES; 1870 mat->preallocated = PETSC_TRUE; 1871 1872 a->rstart = oldmat->rstart; 1873 a->rend = oldmat->rend; 1874 a->cstart = oldmat->cstart; 1875 a->cend = oldmat->cend; 1876 a->size = oldmat->size; 1877 a->rank = oldmat->rank; 1878 a->donotstash = oldmat->donotstash; 1879 a->roworiented = oldmat->roworiented; 1880 a->rowindices = 0; 1881 a->rowvalues = 0; 1882 a->getrowactive = PETSC_FALSE; 1883 1884 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1885 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1886 if (oldmat->colmap) { 1887 #if defined (PETSC_USE_CTABLE) 1888 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1889 #else 1890 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1891 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1892 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1893 #endif 1894 } else a->colmap = 0; 1895 if (oldmat->garray) { 1896 int len; 1897 len = oldmat->B->n; 1898 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1899 PetscLogObjectMemory(mat,len*sizeof(int)); 1900 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1901 } else a->garray = 0; 1902 1903 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1904 PetscLogObjectParent(mat,a->lvec); 1905 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1906 PetscLogObjectParent(mat,a->Mvctx); 1907 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1908 PetscLogObjectParent(mat,a->A); 1909 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1910 PetscLogObjectParent(mat,a->B); 1911 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1912 *newmat = mat; 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #include "petscsys.h" 1917 1918 EXTERN_C_BEGIN 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatLoad_MPIAIJ" 1921 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1922 { 1923 Mat A; 1924 PetscScalar *vals,*svals; 1925 MPI_Comm comm = ((PetscObject)viewer)->comm; 1926 MPI_Status status; 1927 int i,nz,ierr,j,rstart,rend,fd; 1928 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1929 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1930 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1931 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS) 1932 PetscTruth flag; 1933 #endif 1934 1935 PetscFunctionBegin; 1936 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1937 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1938 if (!rank) { 1939 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1940 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1941 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1942 if (header[3] < 0) { 1943 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1944 } 1945 } 1946 1947 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1948 M = header[1]; N = header[2]; 1949 /* determine ownership of all rows */ 1950 m = M/size + ((M % size) > rank); 1951 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1952 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1953 rowners[0] = 0; 1954 for (i=2; i<=size; i++) { 1955 rowners[i] += rowners[i-1]; 1956 } 1957 rstart = rowners[rank]; 1958 rend = rowners[rank+1]; 1959 1960 /* distribute row lengths to all processors */ 1961 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1962 offlens = ourlens + (rend-rstart); 1963 if (!rank) { 1964 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1965 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1966 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1967 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1968 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1969 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1970 } else { 1971 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1972 } 1973 1974 if (!rank) { 1975 /* calculate the number of nonzeros on each processor */ 1976 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1977 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1978 for (i=0; i<size; i++) { 1979 for (j=rowners[i]; j< rowners[i+1]; j++) { 1980 procsnz[i] += rowlengths[j]; 1981 } 1982 } 1983 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1984 1985 /* determine max buffer needed and allocate it */ 1986 maxnz = 0; 1987 for (i=0; i<size; i++) { 1988 maxnz = PetscMax(maxnz,procsnz[i]); 1989 } 1990 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1991 1992 /* read in my part of the matrix column indices */ 1993 nz = procsnz[0]; 1994 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1995 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1996 1997 /* read in every one elses and ship off */ 1998 for (i=1; i<size; i++) { 1999 nz = procsnz[i]; 2000 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2001 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2002 } 2003 ierr = PetscFree(cols);CHKERRQ(ierr); 2004 } else { 2005 /* determine buffer space needed for message */ 2006 nz = 0; 2007 for (i=0; i<m; i++) { 2008 nz += ourlens[i]; 2009 } 2010 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2011 2012 /* receive message of column indices*/ 2013 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2014 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2015 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2016 } 2017 2018 /* determine column ownership if matrix is not square */ 2019 if (N != M) { 2020 n = N/size + ((N % size) > rank); 2021 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2022 cstart = cend - n; 2023 } else { 2024 cstart = rstart; 2025 cend = rend; 2026 n = cend - cstart; 2027 } 2028 2029 /* loop over local rows, determining number of off diagonal entries */ 2030 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2031 jj = 0; 2032 for (i=0; i<m; i++) { 2033 for (j=0; j<ourlens[i]; j++) { 2034 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2035 jj++; 2036 } 2037 } 2038 2039 /* create our matrix */ 2040 for (i=0; i<m; i++) { 2041 ourlens[i] -= offlens[i]; 2042 } 2043 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2044 A = *newmat; 2045 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2046 for (i=0; i<m; i++) { 2047 ourlens[i] += offlens[i]; 2048 } 2049 2050 if (!rank) { 2051 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2052 2053 /* read in my part of the matrix numerical values */ 2054 nz = procsnz[0]; 2055 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2056 2057 /* insert into matrix */ 2058 jj = rstart; 2059 smycols = mycols; 2060 svals = vals; 2061 for (i=0; i<m; i++) { 2062 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2063 smycols += ourlens[i]; 2064 svals += ourlens[i]; 2065 jj++; 2066 } 2067 2068 /* read in other processors and ship out */ 2069 for (i=1; i<size; i++) { 2070 nz = procsnz[i]; 2071 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2072 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2073 } 2074 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2075 } else { 2076 /* receive numeric values */ 2077 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2078 2079 /* receive message of values*/ 2080 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2081 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2082 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2083 2084 /* insert into matrix */ 2085 jj = rstart; 2086 smycols = mycols; 2087 svals = vals; 2088 for (i=0; i<m; i++) { 2089 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2090 smycols += ourlens[i]; 2091 svals += ourlens[i]; 2092 jj++; 2093 } 2094 } 2095 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2096 ierr = PetscFree(vals);CHKERRQ(ierr); 2097 ierr = PetscFree(mycols);CHKERRQ(ierr); 2098 ierr = PetscFree(rowners);CHKERRQ(ierr); 2099 2100 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2101 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2102 #if defined(PETSC_HAVE_SPOOLES) 2103 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 2104 if (flag) { 2105 if (size == 1) { 2106 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2107 } else { 2108 ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); 2109 } 2110 } 2111 #endif 2112 #if defined(PETSC_HAVE_SUPERLUDIST) 2113 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 2114 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 2115 #endif 2116 #if defined(PETSC_HAVE_MUMPS) 2117 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 2118 if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); } 2119 #endif 2120 PetscFunctionReturn(0); 2121 } 2122 EXTERN_C_END 2123 2124 #undef __FUNCT__ 2125 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2126 /* 2127 Not great since it makes two copies of the submatrix, first an SeqAIJ 2128 in local and then by concatenating the local matrices the end result. 2129 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2130 */ 2131 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2132 { 2133 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2134 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2135 Mat *local,M,Mreuse; 2136 PetscScalar *vwork,*aa; 2137 MPI_Comm comm = mat->comm; 2138 Mat_SeqAIJ *aij; 2139 2140 2141 PetscFunctionBegin; 2142 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2143 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2144 2145 if (call == MAT_REUSE_MATRIX) { 2146 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2147 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2148 local = &Mreuse; 2149 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2150 } else { 2151 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2152 Mreuse = *local; 2153 ierr = PetscFree(local);CHKERRQ(ierr); 2154 } 2155 2156 /* 2157 m - number of local rows 2158 n - number of columns (same on all processors) 2159 rstart - first row in new global matrix generated 2160 */ 2161 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2162 if (call == MAT_INITIAL_MATRIX) { 2163 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2164 ii = aij->i; 2165 jj = aij->j; 2166 2167 /* 2168 Determine the number of non-zeros in the diagonal and off-diagonal 2169 portions of the matrix in order to do correct preallocation 2170 */ 2171 2172 /* first get start and end of "diagonal" columns */ 2173 if (csize == PETSC_DECIDE) { 2174 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2175 if (mglobal == n) { /* square matrix */ 2176 nlocal = m; 2177 } else { 2178 nlocal = n/size + ((n % size) > rank); 2179 } 2180 } else { 2181 nlocal = csize; 2182 } 2183 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2184 rstart = rend - nlocal; 2185 if (rank == size - 1 && rend != n) { 2186 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2187 } 2188 2189 /* next, compute all the lengths */ 2190 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2191 olens = dlens + m; 2192 for (i=0; i<m; i++) { 2193 jend = ii[i+1] - ii[i]; 2194 olen = 0; 2195 dlen = 0; 2196 for (j=0; j<jend; j++) { 2197 if (*jj < rstart || *jj >= rend) olen++; 2198 else dlen++; 2199 jj++; 2200 } 2201 olens[i] = olen; 2202 dlens[i] = dlen; 2203 } 2204 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2205 ierr = PetscFree(dlens);CHKERRQ(ierr); 2206 } else { 2207 int ml,nl; 2208 2209 M = *newmat; 2210 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2211 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2212 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2213 /* 2214 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2215 rather than the slower MatSetValues(). 2216 */ 2217 M->was_assembled = PETSC_TRUE; 2218 M->assembled = PETSC_FALSE; 2219 } 2220 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2221 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2222 ii = aij->i; 2223 jj = aij->j; 2224 aa = aij->a; 2225 for (i=0; i<m; i++) { 2226 row = rstart + i; 2227 nz = ii[i+1] - ii[i]; 2228 cwork = jj; jj += nz; 2229 vwork = aa; aa += nz; 2230 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2231 } 2232 2233 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2234 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2235 *newmat = M; 2236 2237 /* save submatrix used in processor for next request */ 2238 if (call == MAT_INITIAL_MATRIX) { 2239 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2240 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2241 } 2242 2243 PetscFunctionReturn(0); 2244 } 2245 2246 #undef __FUNCT__ 2247 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2248 /*@C 2249 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2250 (the default parallel PETSc format). For good matrix assembly performance 2251 the user should preallocate the matrix storage by setting the parameters 2252 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2253 performance can be increased by more than a factor of 50. 2254 2255 Collective on MPI_Comm 2256 2257 Input Parameters: 2258 + A - the matrix 2259 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2260 (same value is used for all local rows) 2261 . d_nnz - array containing the number of nonzeros in the various rows of the 2262 DIAGONAL portion of the local submatrix (possibly different for each row) 2263 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2264 The size of this array is equal to the number of local rows, i.e 'm'. 2265 You must leave room for the diagonal entry even if it is zero. 2266 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2267 submatrix (same value is used for all local rows). 2268 - o_nnz - array containing the number of nonzeros in the various rows of the 2269 OFF-DIAGONAL portion of the local submatrix (possibly different for 2270 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2271 structure. The size of this array is equal to the number 2272 of local rows, i.e 'm'. 2273 2274 The AIJ format (also called the Yale sparse matrix format or 2275 compressed row storage), is fully compatible with standard Fortran 77 2276 storage. That is, the stored row and column indices can begin at 2277 either one (as in Fortran) or zero. See the users manual for details. 2278 2279 The user MUST specify either the local or global matrix dimensions 2280 (possibly both). 2281 2282 The parallel matrix is partitioned such that the first m0 rows belong to 2283 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2284 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2285 2286 The DIAGONAL portion of the local submatrix of a processor can be defined 2287 as the submatrix which is obtained by extraction the part corresponding 2288 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2289 first row that belongs to the processor, and r2 is the last row belonging 2290 to the this processor. This is a square mxm matrix. The remaining portion 2291 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2292 2293 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2294 2295 By default, this format uses inodes (identical nodes) when possible. 2296 We search for consecutive rows with the same nonzero structure, thereby 2297 reusing matrix information to achieve increased efficiency. 2298 2299 Options Database Keys: 2300 + -mat_aij_no_inode - Do not use inodes 2301 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2302 - -mat_aij_oneindex - Internally use indexing starting at 1 2303 rather than 0. Note that when calling MatSetValues(), 2304 the user still MUST index entries starting at 0! 2305 2306 Example usage: 2307 2308 Consider the following 8x8 matrix with 34 non-zero values, that is 2309 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2310 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2311 as follows: 2312 2313 .vb 2314 1 2 0 | 0 3 0 | 0 4 2315 Proc0 0 5 6 | 7 0 0 | 8 0 2316 9 0 10 | 11 0 0 | 12 0 2317 ------------------------------------- 2318 13 0 14 | 15 16 17 | 0 0 2319 Proc1 0 18 0 | 19 20 21 | 0 0 2320 0 0 0 | 22 23 0 | 24 0 2321 ------------------------------------- 2322 Proc2 25 26 27 | 0 0 28 | 29 0 2323 30 0 0 | 31 32 33 | 0 34 2324 .ve 2325 2326 This can be represented as a collection of submatrices as: 2327 2328 .vb 2329 A B C 2330 D E F 2331 G H I 2332 .ve 2333 2334 Where the submatrices A,B,C are owned by proc0, D,E,F are 2335 owned by proc1, G,H,I are owned by proc2. 2336 2337 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2338 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2339 The 'M','N' parameters are 8,8, and have the same values on all procs. 2340 2341 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2342 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2343 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2344 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2345 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2346 matrix, ans [DF] as another SeqAIJ matrix. 2347 2348 When d_nz, o_nz parameters are specified, d_nz storage elements are 2349 allocated for every row of the local diagonal submatrix, and o_nz 2350 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2351 One way to choose d_nz and o_nz is to use the max nonzerors per local 2352 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2353 In this case, the values of d_nz,o_nz are: 2354 .vb 2355 proc0 : dnz = 2, o_nz = 2 2356 proc1 : dnz = 3, o_nz = 2 2357 proc2 : dnz = 1, o_nz = 4 2358 .ve 2359 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2360 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2361 for proc3. i.e we are using 12+15+10=37 storage locations to store 2362 34 values. 2363 2364 When d_nnz, o_nnz parameters are specified, the storage is specified 2365 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2366 In the above case the values for d_nnz,o_nnz are: 2367 .vb 2368 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2369 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2370 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2371 .ve 2372 Here the space allocated is sum of all the above values i.e 34, and 2373 hence pre-allocation is perfect. 2374 2375 Level: intermediate 2376 2377 .keywords: matrix, aij, compressed row, sparse, parallel 2378 2379 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2380 @*/ 2381 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2382 { 2383 int ierr,(*f)(Mat,int,int*,int,int*); 2384 2385 PetscFunctionBegin; 2386 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2387 if (f) { 2388 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2389 } 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatCreateMPIAIJ" 2395 /*@C 2396 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2397 (the default parallel PETSc format). For good matrix assembly performance 2398 the user should preallocate the matrix storage by setting the parameters 2399 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2400 performance can be increased by more than a factor of 50. 2401 2402 Collective on MPI_Comm 2403 2404 Input Parameters: 2405 + comm - MPI communicator 2406 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2407 This value should be the same as the local size used in creating the 2408 y vector for the matrix-vector product y = Ax. 2409 . n - This value should be the same as the local size used in creating the 2410 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2411 calculated if N is given) For square matrices n is almost always m. 2412 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2413 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2414 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2415 (same value is used for all local rows) 2416 . d_nnz - array containing the number of nonzeros in the various rows of the 2417 DIAGONAL portion of the local submatrix (possibly different for each row) 2418 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2419 The size of this array is equal to the number of local rows, i.e 'm'. 2420 You must leave room for the diagonal entry even if it is zero. 2421 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2422 submatrix (same value is used for all local rows). 2423 - o_nnz - array containing the number of nonzeros in the various rows of the 2424 OFF-DIAGONAL portion of the local submatrix (possibly different for 2425 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2426 structure. The size of this array is equal to the number 2427 of local rows, i.e 'm'. 2428 2429 Output Parameter: 2430 . A - the matrix 2431 2432 Notes: 2433 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2434 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2435 storage requirements for this matrix. 2436 2437 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2438 processor than it must be used on all processors that share the object for 2439 that argument. 2440 2441 The AIJ format (also called the Yale sparse matrix format or 2442 compressed row storage), is fully compatible with standard Fortran 77 2443 storage. That is, the stored row and column indices can begin at 2444 either one (as in Fortran) or zero. See the users manual for details. 2445 2446 The user MUST specify either the local or global matrix dimensions 2447 (possibly both). 2448 2449 The parallel matrix is partitioned such that the first m0 rows belong to 2450 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2451 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2452 2453 The DIAGONAL portion of the local submatrix of a processor can be defined 2454 as the submatrix which is obtained by extraction the part corresponding 2455 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2456 first row that belongs to the processor, and r2 is the last row belonging 2457 to the this processor. This is a square mxm matrix. The remaining portion 2458 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2459 2460 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2461 2462 By default, this format uses inodes (identical nodes) when possible. 2463 We search for consecutive rows with the same nonzero structure, thereby 2464 reusing matrix information to achieve increased efficiency. 2465 2466 Options Database Keys: 2467 + -mat_aij_no_inode - Do not use inodes 2468 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2469 - -mat_aij_oneindex - Internally use indexing starting at 1 2470 rather than 0. Note that when calling MatSetValues(), 2471 the user still MUST index entries starting at 0! 2472 2473 2474 Example usage: 2475 2476 Consider the following 8x8 matrix with 34 non-zero values, that is 2477 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2478 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2479 as follows: 2480 2481 .vb 2482 1 2 0 | 0 3 0 | 0 4 2483 Proc0 0 5 6 | 7 0 0 | 8 0 2484 9 0 10 | 11 0 0 | 12 0 2485 ------------------------------------- 2486 13 0 14 | 15 16 17 | 0 0 2487 Proc1 0 18 0 | 19 20 21 | 0 0 2488 0 0 0 | 22 23 0 | 24 0 2489 ------------------------------------- 2490 Proc2 25 26 27 | 0 0 28 | 29 0 2491 30 0 0 | 31 32 33 | 0 34 2492 .ve 2493 2494 This can be represented as a collection of submatrices as: 2495 2496 .vb 2497 A B C 2498 D E F 2499 G H I 2500 .ve 2501 2502 Where the submatrices A,B,C are owned by proc0, D,E,F are 2503 owned by proc1, G,H,I are owned by proc2. 2504 2505 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2506 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2507 The 'M','N' parameters are 8,8, and have the same values on all procs. 2508 2509 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2510 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2511 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2512 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2513 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2514 matrix, ans [DF] as another SeqAIJ matrix. 2515 2516 When d_nz, o_nz parameters are specified, d_nz storage elements are 2517 allocated for every row of the local diagonal submatrix, and o_nz 2518 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2519 One way to choose d_nz and o_nz is to use the max nonzerors per local 2520 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2521 In this case, the values of d_nz,o_nz are: 2522 .vb 2523 proc0 : dnz = 2, o_nz = 2 2524 proc1 : dnz = 3, o_nz = 2 2525 proc2 : dnz = 1, o_nz = 4 2526 .ve 2527 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2528 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2529 for proc3. i.e we are using 12+15+10=37 storage locations to store 2530 34 values. 2531 2532 When d_nnz, o_nnz parameters are specified, the storage is specified 2533 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2534 In the above case the values for d_nnz,o_nnz are: 2535 .vb 2536 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2537 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2538 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2539 .ve 2540 Here the space allocated is sum of all the above values i.e 34, and 2541 hence pre-allocation is perfect. 2542 2543 Level: intermediate 2544 2545 .keywords: matrix, aij, compressed row, sparse, parallel 2546 2547 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2548 @*/ 2549 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2550 { 2551 int ierr,size; 2552 2553 PetscFunctionBegin; 2554 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2555 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2556 if (size > 1) { 2557 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2558 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2559 } else { 2560 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2561 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2562 } 2563 PetscFunctionReturn(0); 2564 } 2565 2566 #undef __FUNCT__ 2567 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2568 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2569 { 2570 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2571 PetscFunctionBegin; 2572 *Ad = a->A; 2573 *Ao = a->B; 2574 *colmap = a->garray; 2575 PetscFunctionReturn(0); 2576 } 2577 2578 #undef __FUNCT__ 2579 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2580 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2581 { 2582 int ierr,i; 2583 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2584 2585 PetscFunctionBegin; 2586 if (coloring->ctype == IS_COLORING_LOCAL) { 2587 ISColoringValue *allcolors,*colors; 2588 ISColoring ocoloring; 2589 2590 /* set coloring for diagonal portion */ 2591 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2592 2593 /* set coloring for off-diagonal portion */ 2594 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2595 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2596 for (i=0; i<a->B->n; i++) { 2597 colors[i] = allcolors[a->garray[i]]; 2598 } 2599 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2600 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2601 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2602 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2603 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2604 ISColoringValue *colors; 2605 int *larray; 2606 ISColoring ocoloring; 2607 2608 /* set coloring for diagonal portion */ 2609 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2610 for (i=0; i<a->A->n; i++) { 2611 larray[i] = i + a->cstart; 2612 } 2613 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2614 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2615 for (i=0; i<a->A->n; i++) { 2616 colors[i] = coloring->colors[larray[i]]; 2617 } 2618 ierr = PetscFree(larray);CHKERRQ(ierr); 2619 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2620 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2621 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2622 2623 /* set coloring for off-diagonal portion */ 2624 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2625 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2626 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2627 for (i=0; i<a->B->n; i++) { 2628 colors[i] = coloring->colors[larray[i]]; 2629 } 2630 ierr = PetscFree(larray);CHKERRQ(ierr); 2631 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2632 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2633 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2634 } else { 2635 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2636 } 2637 2638 PetscFunctionReturn(0); 2639 } 2640 2641 #undef __FUNCT__ 2642 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2643 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2644 { 2645 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2646 int ierr; 2647 2648 PetscFunctionBegin; 2649 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2650 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2651 PetscFunctionReturn(0); 2652 } 2653 2654 #undef __FUNCT__ 2655 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2656 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2657 { 2658 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2659 int ierr; 2660 2661 PetscFunctionBegin; 2662 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2663 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2664 PetscFunctionReturn(0); 2665 } 2666 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatMerge" 2669 /*@C 2670 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2671 matrices from each processor 2672 2673 Collective on MPI_Comm 2674 2675 Input Parameters: 2676 + comm - the communicators the parallel matrix will live on 2677 - inmat - the input sequential matrices 2678 2679 Output Parameter: 2680 . outmat - the parallel matrix generated 2681 2682 Level: advanced 2683 2684 Notes: The number of columns of the matrix in EACH of the seperate files 2685 MUST be the same. 2686 2687 @*/ 2688 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2689 { 2690 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2691 PetscScalar *values; 2692 PetscMap columnmap,rowmap; 2693 2694 PetscFunctionBegin; 2695 2696 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2697 2698 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2699 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2700 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2701 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2702 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2703 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2704 2705 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2706 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2707 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2708 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2709 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2710 2711 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2712 for (i=0;i<m;i++) { 2713 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2714 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2715 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2716 } 2717 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2718 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2719 2720 for (i=0;i<m;i++) { 2721 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2722 I = i + rstart; 2723 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2724 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2725 } 2726 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2727 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2728 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2729 2730 PetscFunctionReturn(0); 2731 } 2732 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "MatFileSplit" 2735 int MatFileSplit(Mat A,char *outfile) 2736 { 2737 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2738 PetscViewer out; 2739 char *name; 2740 Mat B; 2741 PetscScalar *values; 2742 2743 PetscFunctionBegin; 2744 2745 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2746 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2747 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2748 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2749 for (i=0;i<m;i++) { 2750 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2751 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2752 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2753 } 2754 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2755 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2756 2757 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2758 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2759 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2760 sprintf(name,"%s.%d",outfile,rank); 2761 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2762 ierr = PetscFree(name); 2763 ierr = MatView(B,out);CHKERRQ(ierr); 2764 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2765 ierr = MatDestroy(B);CHKERRQ(ierr); 2766 PetscFunctionReturn(0); 2767 } 2768