1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift=0; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 #if defined(PETSC_HAVE_SUPERLUDIST) 413 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 414 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 415 #endif 416 417 #if defined(PETSC_HAVE_SPOOLES) 418 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 419 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 420 #endif 421 422 #if defined(PETSC_HAVE_MUMPS) 423 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 424 if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); } 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 431 int MatZeroEntries_MPIAIJ(Mat A) 432 { 433 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 434 int ierr; 435 436 PetscFunctionBegin; 437 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 438 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatZeroRows_MPIAIJ" 444 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 445 { 446 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 447 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 448 int *nprocs,j,idx,nsends,row; 449 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 450 int *rvalues,tag = A->tag,count,base,slen,n,*source; 451 int *lens,imdex,*lrows,*values,rstart=l->rstart; 452 MPI_Comm comm = A->comm; 453 MPI_Request *send_waits,*recv_waits; 454 MPI_Status recv_status,*send_status; 455 IS istmp; 456 PetscTruth found; 457 458 PetscFunctionBegin; 459 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 460 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 461 462 /* first count number of contributors to each processor */ 463 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 464 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 465 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 466 for (i=0; i<N; i++) { 467 idx = rows[i]; 468 found = PETSC_FALSE; 469 for (j=0; j<size; j++) { 470 if (idx >= owners[j] && idx < owners[j+1]) { 471 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 472 } 473 } 474 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 475 } 476 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 477 478 /* inform other processors of number of messages and max length*/ 479 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 480 481 /* post receives: */ 482 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 484 for (i=0; i<nrecvs; i++) { 485 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 486 } 487 488 /* do sends: 489 1) starts[i] gives the starting index in svalues for stuff going to 490 the ith processor 491 */ 492 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 493 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 494 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 495 starts[0] = 0; 496 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 497 for (i=0; i<N; i++) { 498 svalues[starts[owner[i]]++] = rows[i]; 499 } 500 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 501 502 starts[0] = 0; 503 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 504 count = 0; 505 for (i=0; i<size; i++) { 506 if (nprocs[2*i+1]) { 507 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 508 } 509 } 510 ierr = PetscFree(starts);CHKERRQ(ierr); 511 512 base = owners[rank]; 513 514 /* wait on receives */ 515 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 516 source = lens + nrecvs; 517 count = nrecvs; slen = 0; 518 while (count) { 519 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 520 /* unpack receives into our local space */ 521 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 522 source[imdex] = recv_status.MPI_SOURCE; 523 lens[imdex] = n; 524 slen += n; 525 count--; 526 } 527 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 528 529 /* move the data into the send scatter */ 530 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 531 count = 0; 532 for (i=0; i<nrecvs; i++) { 533 values = rvalues + i*nmax; 534 for (j=0; j<lens[i]; j++) { 535 lrows[count++] = values[j] - base; 536 } 537 } 538 ierr = PetscFree(rvalues);CHKERRQ(ierr); 539 ierr = PetscFree(lens);CHKERRQ(ierr); 540 ierr = PetscFree(owner);CHKERRQ(ierr); 541 ierr = PetscFree(nprocs);CHKERRQ(ierr); 542 543 /* actually zap the local rows */ 544 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 545 PetscLogObjectParent(A,istmp); 546 547 /* 548 Zero the required rows. If the "diagonal block" of the matrix 549 is square and the user wishes to set the diagonal we use seperate 550 code so that MatSetValues() is not called for each diagonal allocating 551 new memory, thus calling lots of mallocs and slowing things down. 552 553 Contributed by: Mathew Knepley 554 */ 555 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 556 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 557 if (diag && (l->A->M == l->A->N)) { 558 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 559 } else if (diag) { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 562 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 564 } 565 for (i = 0; i < slen; i++) { 566 row = lrows[i] + rstart; 567 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 568 } 569 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 } else { 572 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 573 } 574 ierr = ISDestroy(istmp);CHKERRQ(ierr); 575 ierr = PetscFree(lrows);CHKERRQ(ierr); 576 577 /* wait on sends */ 578 if (nsends) { 579 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 580 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 581 ierr = PetscFree(send_status);CHKERRQ(ierr); 582 } 583 ierr = PetscFree(send_waits);CHKERRQ(ierr); 584 ierr = PetscFree(svalues);CHKERRQ(ierr); 585 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMult_MPIAIJ" 591 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 int ierr,nt; 595 596 PetscFunctionBegin; 597 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 598 if (nt != A->n) { 599 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 600 } 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultAdd_MPIAIJ" 610 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 int ierr; 614 615 PetscFunctionBegin; 616 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 617 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 618 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 619 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 625 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 626 { 627 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 628 int ierr; 629 630 PetscFunctionBegin; 631 /* do nondiagonal part */ 632 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 633 /* send it on its way */ 634 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 635 /* do local part */ 636 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 637 /* receive remote parts: note this assumes the values are not actually */ 638 /* inserted in yy until the next line, which is true for my implementation*/ 639 /* but is not perhaps always true. */ 640 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } 643 644 EXTERN_C_BEGIN 645 #undef __FUNCT__ 646 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 647 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 648 { 649 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 650 Mat A = Aij->A, B,Aoff = Aij->B,Boff; 651 MatType type; 652 IS Me,Notme; 653 int M,N,first,last,*notme,i, ierr; 654 655 PetscFunctionBegin; 656 657 /* Compatible types */ 658 ierr = MatGetType(Bmat,&type); CHKERRQ(ierr); 659 ierr = PetscStrcmp(type,MATMPIAIJ,f); CHKERRQ(ierr); 660 if (!*f) SETERRQ(1,"Second matrix needs to be MPIAIJ too"); 661 662 /* Easy test: symmetric diagonal block */ 663 Bij = (Mat_MPIAIJ *) Bmat->data; B = Bij->B; 664 ierr = MatIsSymmetric(A,B,f); CHKERRQ(ierr); 665 if (!*f) PetscFunctionReturn(0); 666 667 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 668 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 669 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 670 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 671 for (i=0; i<first; i++) notme[i] = i; 672 for (i=last; i<M; i++) notme[i-last+first] = i; 673 ierr = ISCreateGeneral 674 (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 675 ierr = ISCreateStride 676 (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 677 ierr = MatGetSubMatrix 678 (B,Notme,Me,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Boff); CHKERRQ(ierr); 679 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 680 ierr = MatDestroy(Boff); CHKERRQ(ierr); 681 ierr = ISDestroy(Me); CHKERRQ(ierr); 682 ierr = ISDestroy(Notme); CHKERRQ(ierr); 683 684 PetscFunctionReturn(0); 685 } 686 EXTERN_C_END 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 690 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 691 { 692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 693 int ierr; 694 695 PetscFunctionBegin; 696 /* do nondiagonal part */ 697 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 698 /* send it on its way */ 699 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 700 /* do local part */ 701 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 702 /* receive remote parts: note this assumes the values are not actually */ 703 /* inserted in yy until the next line, which is true for my implementation*/ 704 /* but is not perhaps always true. */ 705 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 709 /* 710 This only works correctly for square matrices where the subblock A->A is the 711 diagonal block 712 */ 713 #undef __FUNCT__ 714 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 715 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 716 { 717 int ierr; 718 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 719 720 PetscFunctionBegin; 721 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 722 if (a->rstart != a->cstart || a->rend != a->cend) { 723 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 724 } 725 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatScale_MPIAIJ" 731 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 732 { 733 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 734 int ierr; 735 736 PetscFunctionBegin; 737 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 738 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatDestroy_MPIAIJ" 744 int MatDestroy_MPIAIJ(Mat mat) 745 { 746 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 747 int ierr; 748 749 PetscFunctionBegin; 750 #if defined(PETSC_USE_LOG) 751 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 752 #endif 753 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 754 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 755 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 756 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 757 #if defined (PETSC_USE_CTABLE) 758 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 759 #else 760 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 761 #endif 762 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 763 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 764 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 765 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 766 ierr = PetscFree(aij);CHKERRQ(ierr); 767 PetscFunctionReturn(0); 768 } 769 770 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 771 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 772 extern int MatFactorInfo_MUMPS(Mat,PetscViewer); 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatView_MPIAIJ_Binary" 776 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 777 { 778 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 779 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 780 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 781 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 782 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 783 PetscScalar *column_values; 784 785 PetscFunctionBegin; 786 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 787 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 788 nz = A->nz + B->nz; 789 if (rank == 0) { 790 header[0] = MAT_FILE_COOKIE; 791 header[1] = mat->M; 792 header[2] = mat->N; 793 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 794 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 795 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 796 /* get largest number of rows any processor has */ 797 rlen = mat->m; 798 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 799 for (i=1; i<size; i++) { 800 rlen = PetscMax(rlen,range[i+1] - range[i]); 801 } 802 } else { 803 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 804 rlen = mat->m; 805 } 806 807 /* load up the local row counts */ 808 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 809 for (i=0; i<mat->m; i++) { 810 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 811 } 812 813 /* store the row lengths to the file */ 814 if (rank == 0) { 815 MPI_Status status; 816 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 817 for (i=1; i<size; i++) { 818 rlen = range[i+1] - range[i]; 819 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 820 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 821 } 822 } else { 823 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 824 } 825 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 826 827 /* load up the local column indices */ 828 nzmax = nz; /* )th processor needs space a largest processor needs */ 829 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 830 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 831 cnt = 0; 832 for (i=0; i<mat->m; i++) { 833 for (j=B->i[i]; j<B->i[i+1]; j++) { 834 if ( (col = garray[B->j[j]]) > cstart) break; 835 column_indices[cnt++] = col; 836 } 837 for (k=A->i[i]; k<A->i[i+1]; k++) { 838 column_indices[cnt++] = A->j[k] + cstart; 839 } 840 for (; j<B->i[i+1]; j++) { 841 column_indices[cnt++] = garray[B->j[j]]; 842 } 843 } 844 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 845 846 /* store the column indices to the file */ 847 if (rank == 0) { 848 MPI_Status status; 849 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 850 for (i=1; i<size; i++) { 851 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 852 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 853 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 855 } 856 } else { 857 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 858 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(column_indices);CHKERRQ(ierr); 861 862 /* load up the local column values */ 863 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 864 cnt = 0; 865 for (i=0; i<mat->m; i++) { 866 for (j=B->i[i]; j<B->i[i+1]; j++) { 867 if ( garray[B->j[j]] > cstart) break; 868 column_values[cnt++] = B->a[j]; 869 } 870 for (k=A->i[i]; k<A->i[i+1]; k++) { 871 column_values[cnt++] = A->a[k]; 872 } 873 for (; j<B->i[i+1]; j++) { 874 column_values[cnt++] = B->a[j]; 875 } 876 } 877 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 878 879 /* store the column values to the file */ 880 if (rank == 0) { 881 MPI_Status status; 882 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 883 for (i=1; i<size; i++) { 884 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 885 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 886 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 887 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 888 } 889 } else { 890 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 891 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 892 } 893 ierr = PetscFree(column_values);CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 899 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 900 { 901 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 902 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 903 int ierr,rank = aij->rank,size = aij->size; 904 PetscTruth isdraw,isascii,flg,isbinary; 905 PetscViewer sviewer; 906 PetscViewerFormat format; 907 908 PetscFunctionBegin; 909 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 910 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 911 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 912 if (isascii) { 913 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 914 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 915 MatInfo info; 916 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 917 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 918 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 919 if (flg) { 920 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 921 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 922 } else { 923 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 924 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 925 } 926 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 927 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 928 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 929 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 930 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 931 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 932 PetscFunctionReturn(0); 933 } else if (format == PETSC_VIEWER_ASCII_INFO) { 934 PetscFunctionReturn(0); 935 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 936 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 937 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 938 #endif 939 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 940 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 941 #endif 942 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE) 943 ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr); 944 #endif 945 PetscFunctionReturn(0); 946 } 947 } else if (isbinary) { 948 if (size == 1) { 949 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 950 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 951 } else { 952 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 953 } 954 PetscFunctionReturn(0); 955 } else if (isdraw) { 956 PetscDraw draw; 957 PetscTruth isnull; 958 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 959 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 960 } 961 962 if (size == 1) { 963 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 964 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 965 } else { 966 /* assemble the entire matrix onto first processor. */ 967 Mat A; 968 Mat_SeqAIJ *Aloc; 969 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 970 PetscScalar *a; 971 972 if (!rank) { 973 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 974 } else { 975 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 976 } 977 PetscLogObjectParent(mat,A); 978 979 /* copy over the A part */ 980 Aloc = (Mat_SeqAIJ*)aij->A->data; 981 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 982 row = aij->rstart; 983 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 984 for (i=0; i<m; i++) { 985 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 986 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 987 } 988 aj = Aloc->j; 989 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 990 991 /* copy over the B part */ 992 Aloc = (Mat_SeqAIJ*)aij->B->data; 993 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 994 row = aij->rstart; 995 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 996 ct = cols; 997 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 998 for (i=0; i<m; i++) { 999 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1000 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1001 } 1002 ierr = PetscFree(ct);CHKERRQ(ierr); 1003 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1004 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1005 /* 1006 Everyone has to call to draw the matrix since the graphics waits are 1007 synchronized across all processors that share the PetscDraw object 1008 */ 1009 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1010 if (!rank) { 1011 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1012 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1013 } 1014 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1015 ierr = MatDestroy(A);CHKERRQ(ierr); 1016 } 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatView_MPIAIJ" 1022 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1023 { 1024 int ierr; 1025 PetscTruth isascii,isdraw,issocket,isbinary; 1026 1027 PetscFunctionBegin; 1028 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1029 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1030 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1031 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1032 if (isascii || isdraw || isbinary || issocket) { 1033 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1034 } else { 1035 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1036 } 1037 PetscFunctionReturn(0); 1038 } 1039 1040 1041 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatRelax_MPIAIJ" 1044 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1045 { 1046 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1047 int ierr; 1048 Vec bb1; 1049 PetscScalar mone=-1.0; 1050 1051 PetscFunctionBegin; 1052 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1053 1054 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1055 1056 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1057 if (flag & SOR_ZERO_INITIAL_GUESS) { 1058 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1059 its--; 1060 } 1061 1062 while (its--) { 1063 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1064 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1065 1066 /* update rhs: bb1 = bb - B*x */ 1067 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1068 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1069 1070 /* local sweep */ 1071 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1072 CHKERRQ(ierr); 1073 } 1074 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1075 if (flag & SOR_ZERO_INITIAL_GUESS) { 1076 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1077 its--; 1078 } 1079 while (its--) { 1080 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1081 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1082 1083 /* update rhs: bb1 = bb - B*x */ 1084 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1085 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1086 1087 /* local sweep */ 1088 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1089 CHKERRQ(ierr); 1090 } 1091 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1092 if (flag & SOR_ZERO_INITIAL_GUESS) { 1093 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1094 its--; 1095 } 1096 while (its--) { 1097 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1098 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1099 1100 /* update rhs: bb1 = bb - B*x */ 1101 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1102 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1103 1104 /* local sweep */ 1105 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1106 CHKERRQ(ierr); 1107 } 1108 } else { 1109 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1110 } 1111 1112 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1118 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1119 { 1120 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1121 Mat A = mat->A,B = mat->B; 1122 int ierr; 1123 PetscReal isend[5],irecv[5]; 1124 1125 PetscFunctionBegin; 1126 info->block_size = 1.0; 1127 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1128 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1129 isend[3] = info->memory; isend[4] = info->mallocs; 1130 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1131 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1132 isend[3] += info->memory; isend[4] += info->mallocs; 1133 if (flag == MAT_LOCAL) { 1134 info->nz_used = isend[0]; 1135 info->nz_allocated = isend[1]; 1136 info->nz_unneeded = isend[2]; 1137 info->memory = isend[3]; 1138 info->mallocs = isend[4]; 1139 } else if (flag == MAT_GLOBAL_MAX) { 1140 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1141 info->nz_used = irecv[0]; 1142 info->nz_allocated = irecv[1]; 1143 info->nz_unneeded = irecv[2]; 1144 info->memory = irecv[3]; 1145 info->mallocs = irecv[4]; 1146 } else if (flag == MAT_GLOBAL_SUM) { 1147 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1148 info->nz_used = irecv[0]; 1149 info->nz_allocated = irecv[1]; 1150 info->nz_unneeded = irecv[2]; 1151 info->memory = irecv[3]; 1152 info->mallocs = irecv[4]; 1153 } 1154 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1155 info->fill_ratio_needed = 0; 1156 info->factor_mallocs = 0; 1157 info->rows_global = (double)matin->M; 1158 info->columns_global = (double)matin->N; 1159 info->rows_local = (double)matin->m; 1160 info->columns_local = (double)matin->N; 1161 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatSetOption_MPIAIJ" 1167 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1168 { 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1170 int ierr; 1171 1172 PetscFunctionBegin; 1173 switch (op) { 1174 case MAT_NO_NEW_NONZERO_LOCATIONS: 1175 case MAT_YES_NEW_NONZERO_LOCATIONS: 1176 case MAT_COLUMNS_UNSORTED: 1177 case MAT_COLUMNS_SORTED: 1178 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1179 case MAT_KEEP_ZEROED_ROWS: 1180 case MAT_NEW_NONZERO_LOCATION_ERR: 1181 case MAT_USE_INODES: 1182 case MAT_DO_NOT_USE_INODES: 1183 case MAT_IGNORE_ZERO_ENTRIES: 1184 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1185 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1186 break; 1187 case MAT_ROW_ORIENTED: 1188 a->roworiented = PETSC_TRUE; 1189 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1190 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1191 break; 1192 case MAT_ROWS_SORTED: 1193 case MAT_ROWS_UNSORTED: 1194 case MAT_YES_NEW_DIAGONALS: 1195 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1196 break; 1197 case MAT_COLUMN_ORIENTED: 1198 a->roworiented = PETSC_FALSE; 1199 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1200 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1201 break; 1202 case MAT_IGNORE_OFF_PROC_ENTRIES: 1203 a->donotstash = PETSC_TRUE; 1204 break; 1205 case MAT_NO_NEW_DIAGONALS: 1206 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1207 default: 1208 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1209 } 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatGetRow_MPIAIJ" 1215 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1216 { 1217 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1218 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1219 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1220 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1221 int *cmap,*idx_p; 1222 1223 PetscFunctionBegin; 1224 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1225 mat->getrowactive = PETSC_TRUE; 1226 1227 if (!mat->rowvalues && (idx || v)) { 1228 /* 1229 allocate enough space to hold information from the longest row. 1230 */ 1231 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1232 int max = 1,tmp; 1233 for (i=0; i<matin->m; i++) { 1234 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1235 if (max < tmp) { max = tmp; } 1236 } 1237 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1238 mat->rowindices = (int*)(mat->rowvalues + max); 1239 } 1240 1241 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1242 lrow = row - rstart; 1243 1244 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1245 if (!v) {pvA = 0; pvB = 0;} 1246 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1247 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1249 nztot = nzA + nzB; 1250 1251 cmap = mat->garray; 1252 if (v || idx) { 1253 if (nztot) { 1254 /* Sort by increasing column numbers, assuming A and B already sorted */ 1255 int imark = -1; 1256 if (v) { 1257 *v = v_p = mat->rowvalues; 1258 for (i=0; i<nzB; i++) { 1259 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1260 else break; 1261 } 1262 imark = i; 1263 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1264 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1265 } 1266 if (idx) { 1267 *idx = idx_p = mat->rowindices; 1268 if (imark > -1) { 1269 for (i=0; i<imark; i++) { 1270 idx_p[i] = cmap[cworkB[i]]; 1271 } 1272 } else { 1273 for (i=0; i<nzB; i++) { 1274 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1275 else break; 1276 } 1277 imark = i; 1278 } 1279 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1280 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1281 } 1282 } else { 1283 if (idx) *idx = 0; 1284 if (v) *v = 0; 1285 } 1286 } 1287 *nz = nztot; 1288 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1289 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1295 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1296 { 1297 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1298 1299 PetscFunctionBegin; 1300 if (aij->getrowactive == PETSC_FALSE) { 1301 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1302 } 1303 aij->getrowactive = PETSC_FALSE; 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatNorm_MPIAIJ" 1309 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1313 int ierr,i,j,cstart = aij->cstart; 1314 PetscReal sum = 0.0; 1315 PetscScalar *v; 1316 1317 PetscFunctionBegin; 1318 if (aij->size == 1) { 1319 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1320 } else { 1321 if (type == NORM_FROBENIUS) { 1322 v = amat->a; 1323 for (i=0; i<amat->nz; i++) { 1324 #if defined(PETSC_USE_COMPLEX) 1325 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1326 #else 1327 sum += (*v)*(*v); v++; 1328 #endif 1329 } 1330 v = bmat->a; 1331 for (i=0; i<bmat->nz; i++) { 1332 #if defined(PETSC_USE_COMPLEX) 1333 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1334 #else 1335 sum += (*v)*(*v); v++; 1336 #endif 1337 } 1338 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1339 *norm = sqrt(*norm); 1340 } else if (type == NORM_1) { /* max column norm */ 1341 PetscReal *tmp,*tmp2; 1342 int *jj,*garray = aij->garray; 1343 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1344 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1345 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1346 *norm = 0.0; 1347 v = amat->a; jj = amat->j; 1348 for (j=0; j<amat->nz; j++) { 1349 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1350 } 1351 v = bmat->a; jj = bmat->j; 1352 for (j=0; j<bmat->nz; j++) { 1353 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1354 } 1355 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1356 for (j=0; j<mat->N; j++) { 1357 if (tmp2[j] > *norm) *norm = tmp2[j]; 1358 } 1359 ierr = PetscFree(tmp);CHKERRQ(ierr); 1360 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1361 } else if (type == NORM_INFINITY) { /* max row norm */ 1362 PetscReal ntemp = 0.0; 1363 for (j=0; j<aij->A->m; j++) { 1364 v = amat->a + amat->i[j]; 1365 sum = 0.0; 1366 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1367 sum += PetscAbsScalar(*v); v++; 1368 } 1369 v = bmat->a + bmat->i[j]; 1370 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1371 sum += PetscAbsScalar(*v); v++; 1372 } 1373 if (sum > ntemp) ntemp = sum; 1374 } 1375 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1376 } else { 1377 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1378 } 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatTranspose_MPIAIJ" 1385 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1386 { 1387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1388 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1389 int ierr; 1390 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1391 Mat B; 1392 PetscScalar *array; 1393 1394 PetscFunctionBegin; 1395 if (!matout && M != N) { 1396 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1397 } 1398 1399 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1400 1401 /* copy over the A part */ 1402 Aloc = (Mat_SeqAIJ*)a->A->data; 1403 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1404 row = a->rstart; 1405 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1406 for (i=0; i<m; i++) { 1407 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1408 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1409 } 1410 aj = Aloc->j; 1411 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1412 1413 /* copy over the B part */ 1414 Aloc = (Mat_SeqAIJ*)a->B->data; 1415 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1416 row = a->rstart; 1417 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1418 ct = cols; 1419 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1420 for (i=0; i<m; i++) { 1421 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1422 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1423 } 1424 ierr = PetscFree(ct);CHKERRQ(ierr); 1425 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1426 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1427 if (matout) { 1428 *matout = B; 1429 } else { 1430 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1431 } 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1437 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1438 { 1439 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1440 Mat a = aij->A,b = aij->B; 1441 int ierr,s1,s2,s3; 1442 1443 PetscFunctionBegin; 1444 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1445 if (rr) { 1446 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1447 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1448 /* Overlap communication with computation. */ 1449 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1450 } 1451 if (ll) { 1452 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1453 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1454 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1455 } 1456 /* scale the diagonal block */ 1457 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1458 1459 if (rr) { 1460 /* Do a scatter end and then right scale the off-diagonal block */ 1461 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1462 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1463 } 1464 1465 PetscFunctionReturn(0); 1466 } 1467 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1471 int MatPrintHelp_MPIAIJ(Mat A) 1472 { 1473 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1474 int ierr; 1475 1476 PetscFunctionBegin; 1477 if (!a->rank) { 1478 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1479 } 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1485 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1486 { 1487 PetscFunctionBegin; 1488 *bs = 1; 1489 PetscFunctionReturn(0); 1490 } 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1493 int MatSetUnfactored_MPIAIJ(Mat A) 1494 { 1495 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1496 int ierr; 1497 1498 PetscFunctionBegin; 1499 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatEqual_MPIAIJ" 1505 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1506 { 1507 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1508 Mat a,b,c,d; 1509 PetscTruth flg; 1510 int ierr; 1511 1512 PetscFunctionBegin; 1513 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1514 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1515 a = matA->A; b = matA->B; 1516 c = matB->A; d = matB->B; 1517 1518 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1519 if (flg == PETSC_TRUE) { 1520 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1521 } 1522 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatCopy_MPIAIJ" 1528 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1529 { 1530 int ierr; 1531 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1532 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1533 PetscTruth flg; 1534 1535 PetscFunctionBegin; 1536 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1537 if (str != SAME_NONZERO_PATTERN || !flg) { 1538 /* because of the column compression in the off-processor part of the matrix a->B, 1539 the number of columns in a->B and b->B may be different, hence we cannot call 1540 the MatCopy() directly on the two parts. If need be, we can provide a more 1541 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1542 then copying the submatrices */ 1543 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1544 } else { 1545 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1546 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1547 } 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1553 int MatSetUpPreallocation_MPIAIJ(Mat A) 1554 { 1555 int ierr; 1556 1557 PetscFunctionBegin; 1558 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1563 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1564 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1565 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1566 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1567 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1568 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1569 #endif 1570 1571 #include "petscblaslapack.h" 1572 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure); 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatAXPY_MPIAIJ" 1575 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1576 { 1577 int ierr,one=1,i; 1578 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1579 Mat_SeqAIJ *x,*y; 1580 1581 PetscFunctionBegin; 1582 if (str == SAME_NONZERO_PATTERN) { 1583 x = (Mat_SeqAIJ *)xx->A->data; 1584 y = (Mat_SeqAIJ *)yy->A->data; 1585 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1586 x = (Mat_SeqAIJ *)xx->B->data; 1587 y = (Mat_SeqAIJ *)yy->B->data; 1588 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1589 } else if (str == SUBSET_NONZERO_PATTERN) { 1590 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1591 1592 x = (Mat_SeqAIJ *)xx->B->data; 1593 y = (Mat_SeqAIJ *)yy->B->data; 1594 if (y->xtoy && y->XtoY != xx->B) { 1595 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1596 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1597 } 1598 if (!y->xtoy) { /* get xtoy */ 1599 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1600 y->XtoY = xx->B; 1601 } 1602 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1603 } else { 1604 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1605 } 1606 PetscFunctionReturn(0); 1607 } 1608 1609 /* -------------------------------------------------------------------*/ 1610 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1611 MatGetRow_MPIAIJ, 1612 MatRestoreRow_MPIAIJ, 1613 MatMult_MPIAIJ, 1614 MatMultAdd_MPIAIJ, 1615 MatMultTranspose_MPIAIJ, 1616 MatMultTransposeAdd_MPIAIJ, 1617 0, 1618 0, 1619 0, 1620 0, 1621 0, 1622 0, 1623 MatRelax_MPIAIJ, 1624 MatTranspose_MPIAIJ, 1625 MatGetInfo_MPIAIJ, 1626 MatEqual_MPIAIJ, 1627 MatGetDiagonal_MPIAIJ, 1628 MatDiagonalScale_MPIAIJ, 1629 MatNorm_MPIAIJ, 1630 MatAssemblyBegin_MPIAIJ, 1631 MatAssemblyEnd_MPIAIJ, 1632 0, 1633 MatSetOption_MPIAIJ, 1634 MatZeroEntries_MPIAIJ, 1635 MatZeroRows_MPIAIJ, 1636 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1637 MatLUFactorSymbolic_MPIAIJ_TFS, 1638 #else 1639 0, 1640 #endif 1641 0, 1642 0, 1643 0, 1644 MatSetUpPreallocation_MPIAIJ, 1645 0, 1646 0, 1647 0, 1648 0, 1649 MatDuplicate_MPIAIJ, 1650 0, 1651 0, 1652 0, 1653 0, 1654 MatAXPY_MPIAIJ, 1655 MatGetSubMatrices_MPIAIJ, 1656 MatIncreaseOverlap_MPIAIJ, 1657 MatGetValues_MPIAIJ, 1658 MatCopy_MPIAIJ, 1659 MatPrintHelp_MPIAIJ, 1660 MatScale_MPIAIJ, 1661 0, 1662 0, 1663 0, 1664 MatGetBlockSize_MPIAIJ, 1665 0, 1666 0, 1667 0, 1668 0, 1669 MatFDColoringCreate_MPIAIJ, 1670 0, 1671 MatSetUnfactored_MPIAIJ, 1672 0, 1673 0, 1674 MatGetSubMatrix_MPIAIJ, 1675 MatDestroy_MPIAIJ, 1676 MatView_MPIAIJ, 1677 MatGetPetscMaps_Petsc, 1678 0, 1679 0, 1680 0, 1681 0, 1682 0, 1683 0, 1684 0, 1685 0, 1686 MatSetColoring_MPIAIJ, 1687 MatSetValuesAdic_MPIAIJ, 1688 MatSetValuesAdifor_MPIAIJ 1689 }; 1690 1691 /* ----------------------------------------------------------------------------------------*/ 1692 1693 EXTERN_C_BEGIN 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1696 int MatStoreValues_MPIAIJ(Mat mat) 1697 { 1698 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1699 int ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1703 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 EXTERN_C_END 1707 1708 EXTERN_C_BEGIN 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1711 int MatRetrieveValues_MPIAIJ(Mat mat) 1712 { 1713 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1714 int ierr; 1715 1716 PetscFunctionBegin; 1717 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1718 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 } 1721 EXTERN_C_END 1722 1723 #include "petscpc.h" 1724 EXTERN_C_BEGIN 1725 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1726 EXTERN_C_END 1727 1728 EXTERN_C_BEGIN 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatCreate_MPIAIJ" 1731 int MatCreate_MPIAIJ(Mat B) 1732 { 1733 Mat_MPIAIJ *b; 1734 int ierr,i,size; 1735 1736 PetscFunctionBegin; 1737 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1738 1739 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1740 B->data = (void*)b; 1741 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1742 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1743 B->factor = 0; 1744 B->assembled = PETSC_FALSE; 1745 B->mapping = 0; 1746 1747 B->insertmode = NOT_SET_VALUES; 1748 b->size = size; 1749 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1750 1751 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1752 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1753 1754 /* the information in the maps duplicates the information computed below, eventually 1755 we should remove the duplicate information that is not contained in the maps */ 1756 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1757 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1758 1759 /* build local table of row and column ownerships */ 1760 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1761 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1762 b->cowners = b->rowners + b->size + 2; 1763 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1764 b->rowners[0] = 0; 1765 for (i=2; i<=b->size; i++) { 1766 b->rowners[i] += b->rowners[i-1]; 1767 } 1768 b->rstart = b->rowners[b->rank]; 1769 b->rend = b->rowners[b->rank+1]; 1770 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1771 b->cowners[0] = 0; 1772 for (i=2; i<=b->size; i++) { 1773 b->cowners[i] += b->cowners[i-1]; 1774 } 1775 b->cstart = b->cowners[b->rank]; 1776 b->cend = b->cowners[b->rank+1]; 1777 1778 /* build cache for off array entries formed */ 1779 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1780 b->donotstash = PETSC_FALSE; 1781 b->colmap = 0; 1782 b->garray = 0; 1783 b->roworiented = PETSC_TRUE; 1784 1785 /* stuff used for matrix vector multiply */ 1786 b->lvec = PETSC_NULL; 1787 b->Mvctx = PETSC_NULL; 1788 1789 /* stuff for MatGetRow() */ 1790 b->rowindices = 0; 1791 b->rowvalues = 0; 1792 b->getrowactive = PETSC_FALSE; 1793 1794 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1795 "MatStoreValues_MPIAIJ", 1796 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1797 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1798 "MatRetrieveValues_MPIAIJ", 1799 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1800 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1801 "MatGetDiagonalBlock_MPIAIJ", 1802 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1803 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1804 "MatIsSymmetric_MPIAIJ", 1805 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 EXTERN_C_END 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1812 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1813 { 1814 Mat mat; 1815 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1816 int ierr; 1817 1818 PetscFunctionBegin; 1819 *newmat = 0; 1820 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1821 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1822 a = (Mat_MPIAIJ*)mat->data; 1823 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1824 mat->factor = matin->factor; 1825 mat->assembled = PETSC_TRUE; 1826 mat->insertmode = NOT_SET_VALUES; 1827 mat->preallocated = PETSC_TRUE; 1828 1829 a->rstart = oldmat->rstart; 1830 a->rend = oldmat->rend; 1831 a->cstart = oldmat->cstart; 1832 a->cend = oldmat->cend; 1833 a->size = oldmat->size; 1834 a->rank = oldmat->rank; 1835 a->donotstash = oldmat->donotstash; 1836 a->roworiented = oldmat->roworiented; 1837 a->rowindices = 0; 1838 a->rowvalues = 0; 1839 a->getrowactive = PETSC_FALSE; 1840 1841 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1842 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1843 if (oldmat->colmap) { 1844 #if defined (PETSC_USE_CTABLE) 1845 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1846 #else 1847 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1848 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1849 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1850 #endif 1851 } else a->colmap = 0; 1852 if (oldmat->garray) { 1853 int len; 1854 len = oldmat->B->n; 1855 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1856 PetscLogObjectMemory(mat,len*sizeof(int)); 1857 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1858 } else a->garray = 0; 1859 1860 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1861 PetscLogObjectParent(mat,a->lvec); 1862 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1863 PetscLogObjectParent(mat,a->Mvctx); 1864 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1865 PetscLogObjectParent(mat,a->A); 1866 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1867 PetscLogObjectParent(mat,a->B); 1868 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1869 *newmat = mat; 1870 PetscFunctionReturn(0); 1871 } 1872 1873 #include "petscsys.h" 1874 1875 EXTERN_C_BEGIN 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatLoad_MPIAIJ" 1878 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1879 { 1880 Mat A; 1881 PetscScalar *vals,*svals; 1882 MPI_Comm comm = ((PetscObject)viewer)->comm; 1883 MPI_Status status; 1884 int i,nz,ierr,j,rstart,rend,fd; 1885 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1886 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1887 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1888 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS) 1889 PetscTruth flag; 1890 #endif 1891 1892 PetscFunctionBegin; 1893 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1894 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1895 if (!rank) { 1896 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1897 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1898 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1899 if (header[3] < 0) { 1900 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1901 } 1902 } 1903 1904 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1905 M = header[1]; N = header[2]; 1906 /* determine ownership of all rows */ 1907 m = M/size + ((M % size) > rank); 1908 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1909 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1910 rowners[0] = 0; 1911 for (i=2; i<=size; i++) { 1912 rowners[i] += rowners[i-1]; 1913 } 1914 rstart = rowners[rank]; 1915 rend = rowners[rank+1]; 1916 1917 /* distribute row lengths to all processors */ 1918 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1919 offlens = ourlens + (rend-rstart); 1920 if (!rank) { 1921 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1922 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1923 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1924 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1925 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1926 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1927 } else { 1928 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1929 } 1930 1931 if (!rank) { 1932 /* calculate the number of nonzeros on each processor */ 1933 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1934 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1935 for (i=0; i<size; i++) { 1936 for (j=rowners[i]; j< rowners[i+1]; j++) { 1937 procsnz[i] += rowlengths[j]; 1938 } 1939 } 1940 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1941 1942 /* determine max buffer needed and allocate it */ 1943 maxnz = 0; 1944 for (i=0; i<size; i++) { 1945 maxnz = PetscMax(maxnz,procsnz[i]); 1946 } 1947 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1948 1949 /* read in my part of the matrix column indices */ 1950 nz = procsnz[0]; 1951 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1952 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1953 1954 /* read in every one elses and ship off */ 1955 for (i=1; i<size; i++) { 1956 nz = procsnz[i]; 1957 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1958 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1959 } 1960 ierr = PetscFree(cols);CHKERRQ(ierr); 1961 } else { 1962 /* determine buffer space needed for message */ 1963 nz = 0; 1964 for (i=0; i<m; i++) { 1965 nz += ourlens[i]; 1966 } 1967 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1968 1969 /* receive message of column indices*/ 1970 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1971 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1972 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1973 } 1974 1975 /* determine column ownership if matrix is not square */ 1976 if (N != M) { 1977 n = N/size + ((N % size) > rank); 1978 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1979 cstart = cend - n; 1980 } else { 1981 cstart = rstart; 1982 cend = rend; 1983 n = cend - cstart; 1984 } 1985 1986 /* loop over local rows, determining number of off diagonal entries */ 1987 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1988 jj = 0; 1989 for (i=0; i<m; i++) { 1990 for (j=0; j<ourlens[i]; j++) { 1991 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1992 jj++; 1993 } 1994 } 1995 1996 /* create our matrix */ 1997 for (i=0; i<m; i++) { 1998 ourlens[i] -= offlens[i]; 1999 } 2000 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2001 A = *newmat; 2002 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2003 for (i=0; i<m; i++) { 2004 ourlens[i] += offlens[i]; 2005 } 2006 2007 if (!rank) { 2008 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2009 2010 /* read in my part of the matrix numerical values */ 2011 nz = procsnz[0]; 2012 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2013 2014 /* insert into matrix */ 2015 jj = rstart; 2016 smycols = mycols; 2017 svals = vals; 2018 for (i=0; i<m; i++) { 2019 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2020 smycols += ourlens[i]; 2021 svals += ourlens[i]; 2022 jj++; 2023 } 2024 2025 /* read in other processors and ship out */ 2026 for (i=1; i<size; i++) { 2027 nz = procsnz[i]; 2028 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2029 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2030 } 2031 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2032 } else { 2033 /* receive numeric values */ 2034 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2035 2036 /* receive message of values*/ 2037 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2038 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2039 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2040 2041 /* insert into matrix */ 2042 jj = rstart; 2043 smycols = mycols; 2044 svals = vals; 2045 for (i=0; i<m; i++) { 2046 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2047 smycols += ourlens[i]; 2048 svals += ourlens[i]; 2049 jj++; 2050 } 2051 } 2052 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2053 ierr = PetscFree(vals);CHKERRQ(ierr); 2054 ierr = PetscFree(mycols);CHKERRQ(ierr); 2055 ierr = PetscFree(rowners);CHKERRQ(ierr); 2056 2057 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2058 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2059 #if defined(PETSC_HAVE_SPOOLES) 2060 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 2061 if (flag) { 2062 if (size == 1) { 2063 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2064 } else { 2065 ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); 2066 } 2067 } 2068 #endif 2069 #if defined(PETSC_HAVE_SUPERLUDIST) 2070 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 2071 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 2072 #endif 2073 #if defined(PETSC_HAVE_MUMPS) 2074 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 2075 if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); } 2076 #endif 2077 PetscFunctionReturn(0); 2078 } 2079 EXTERN_C_END 2080 2081 #undef __FUNCT__ 2082 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2083 /* 2084 Not great since it makes two copies of the submatrix, first an SeqAIJ 2085 in local and then by concatenating the local matrices the end result. 2086 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2087 */ 2088 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2089 { 2090 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2091 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2092 Mat *local,M,Mreuse; 2093 PetscScalar *vwork,*aa; 2094 MPI_Comm comm = mat->comm; 2095 Mat_SeqAIJ *aij; 2096 2097 2098 PetscFunctionBegin; 2099 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2101 2102 if (call == MAT_REUSE_MATRIX) { 2103 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2104 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2105 local = &Mreuse; 2106 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2107 } else { 2108 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2109 Mreuse = *local; 2110 ierr = PetscFree(local);CHKERRQ(ierr); 2111 } 2112 2113 /* 2114 m - number of local rows 2115 n - number of columns (same on all processors) 2116 rstart - first row in new global matrix generated 2117 */ 2118 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2119 if (call == MAT_INITIAL_MATRIX) { 2120 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2121 ii = aij->i; 2122 jj = aij->j; 2123 2124 /* 2125 Determine the number of non-zeros in the diagonal and off-diagonal 2126 portions of the matrix in order to do correct preallocation 2127 */ 2128 2129 /* first get start and end of "diagonal" columns */ 2130 if (csize == PETSC_DECIDE) { 2131 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2132 if (mglobal == n) { /* square matrix */ 2133 nlocal = m; 2134 } else { 2135 nlocal = n/size + ((n % size) > rank); 2136 } 2137 } else { 2138 nlocal = csize; 2139 } 2140 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2141 rstart = rend - nlocal; 2142 if (rank == size - 1 && rend != n) { 2143 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2144 } 2145 2146 /* next, compute all the lengths */ 2147 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2148 olens = dlens + m; 2149 for (i=0; i<m; i++) { 2150 jend = ii[i+1] - ii[i]; 2151 olen = 0; 2152 dlen = 0; 2153 for (j=0; j<jend; j++) { 2154 if (*jj < rstart || *jj >= rend) olen++; 2155 else dlen++; 2156 jj++; 2157 } 2158 olens[i] = olen; 2159 dlens[i] = dlen; 2160 } 2161 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2162 ierr = PetscFree(dlens);CHKERRQ(ierr); 2163 } else { 2164 int ml,nl; 2165 2166 M = *newmat; 2167 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2168 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2169 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2170 /* 2171 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2172 rather than the slower MatSetValues(). 2173 */ 2174 M->was_assembled = PETSC_TRUE; 2175 M->assembled = PETSC_FALSE; 2176 } 2177 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2178 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2179 ii = aij->i; 2180 jj = aij->j; 2181 aa = aij->a; 2182 for (i=0; i<m; i++) { 2183 row = rstart + i; 2184 nz = ii[i+1] - ii[i]; 2185 cwork = jj; jj += nz; 2186 vwork = aa; aa += nz; 2187 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2188 } 2189 2190 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2191 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2192 *newmat = M; 2193 2194 /* save submatrix used in processor for next request */ 2195 if (call == MAT_INITIAL_MATRIX) { 2196 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2197 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2198 } 2199 2200 PetscFunctionReturn(0); 2201 } 2202 2203 #undef __FUNCT__ 2204 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2205 /*@C 2206 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2207 (the default parallel PETSc format). For good matrix assembly performance 2208 the user should preallocate the matrix storage by setting the parameters 2209 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2210 performance can be increased by more than a factor of 50. 2211 2212 Collective on MPI_Comm 2213 2214 Input Parameters: 2215 + A - the matrix 2216 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2217 (same value is used for all local rows) 2218 . d_nnz - array containing the number of nonzeros in the various rows of the 2219 DIAGONAL portion of the local submatrix (possibly different for each row) 2220 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2221 The size of this array is equal to the number of local rows, i.e 'm'. 2222 You must leave room for the diagonal entry even if it is zero. 2223 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2224 submatrix (same value is used for all local rows). 2225 - o_nnz - array containing the number of nonzeros in the various rows of the 2226 OFF-DIAGONAL portion of the local submatrix (possibly different for 2227 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2228 structure. The size of this array is equal to the number 2229 of local rows, i.e 'm'. 2230 2231 The AIJ format (also called the Yale sparse matrix format or 2232 compressed row storage), is fully compatible with standard Fortran 77 2233 storage. That is, the stored row and column indices can begin at 2234 either one (as in Fortran) or zero. See the users manual for details. 2235 2236 The user MUST specify either the local or global matrix dimensions 2237 (possibly both). 2238 2239 The parallel matrix is partitioned such that the first m0 rows belong to 2240 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2241 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2242 2243 The DIAGONAL portion of the local submatrix of a processor can be defined 2244 as the submatrix which is obtained by extraction the part corresponding 2245 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2246 first row that belongs to the processor, and r2 is the last row belonging 2247 to the this processor. This is a square mxm matrix. The remaining portion 2248 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2249 2250 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2251 2252 By default, this format uses inodes (identical nodes) when possible. 2253 We search for consecutive rows with the same nonzero structure, thereby 2254 reusing matrix information to achieve increased efficiency. 2255 2256 Options Database Keys: 2257 + -mat_aij_no_inode - Do not use inodes 2258 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2259 - -mat_aij_oneindex - Internally use indexing starting at 1 2260 rather than 0. Note that when calling MatSetValues(), 2261 the user still MUST index entries starting at 0! 2262 2263 Example usage: 2264 2265 Consider the following 8x8 matrix with 34 non-zero values, that is 2266 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2267 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2268 as follows: 2269 2270 .vb 2271 1 2 0 | 0 3 0 | 0 4 2272 Proc0 0 5 6 | 7 0 0 | 8 0 2273 9 0 10 | 11 0 0 | 12 0 2274 ------------------------------------- 2275 13 0 14 | 15 16 17 | 0 0 2276 Proc1 0 18 0 | 19 20 21 | 0 0 2277 0 0 0 | 22 23 0 | 24 0 2278 ------------------------------------- 2279 Proc2 25 26 27 | 0 0 28 | 29 0 2280 30 0 0 | 31 32 33 | 0 34 2281 .ve 2282 2283 This can be represented as a collection of submatrices as: 2284 2285 .vb 2286 A B C 2287 D E F 2288 G H I 2289 .ve 2290 2291 Where the submatrices A,B,C are owned by proc0, D,E,F are 2292 owned by proc1, G,H,I are owned by proc2. 2293 2294 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2295 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2296 The 'M','N' parameters are 8,8, and have the same values on all procs. 2297 2298 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2299 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2300 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2301 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2302 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2303 matrix, ans [DF] as another SeqAIJ matrix. 2304 2305 When d_nz, o_nz parameters are specified, d_nz storage elements are 2306 allocated for every row of the local diagonal submatrix, and o_nz 2307 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2308 One way to choose d_nz and o_nz is to use the max nonzerors per local 2309 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2310 In this case, the values of d_nz,o_nz are: 2311 .vb 2312 proc0 : dnz = 2, o_nz = 2 2313 proc1 : dnz = 3, o_nz = 2 2314 proc2 : dnz = 1, o_nz = 4 2315 .ve 2316 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2317 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2318 for proc3. i.e we are using 12+15+10=37 storage locations to store 2319 34 values. 2320 2321 When d_nnz, o_nnz parameters are specified, the storage is specified 2322 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2323 In the above case the values for d_nnz,o_nnz are: 2324 .vb 2325 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2326 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2327 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2328 .ve 2329 Here the space allocated is sum of all the above values i.e 34, and 2330 hence pre-allocation is perfect. 2331 2332 Level: intermediate 2333 2334 .keywords: matrix, aij, compressed row, sparse, parallel 2335 2336 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2337 @*/ 2338 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2339 { 2340 Mat_MPIAIJ *b; 2341 int ierr,i; 2342 PetscTruth flg2; 2343 2344 PetscFunctionBegin; 2345 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2346 if (!flg2) PetscFunctionReturn(0); 2347 B->preallocated = PETSC_TRUE; 2348 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2349 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2350 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2351 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2352 if (d_nnz) { 2353 for (i=0; i<B->m; i++) { 2354 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2355 } 2356 } 2357 if (o_nnz) { 2358 for (i=0; i<B->m; i++) { 2359 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2360 } 2361 } 2362 b = (Mat_MPIAIJ*)B->data; 2363 2364 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2365 PetscLogObjectParent(B,b->A); 2366 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2367 PetscLogObjectParent(B,b->B); 2368 2369 PetscFunctionReturn(0); 2370 } 2371 2372 #undef __FUNCT__ 2373 #define __FUNCT__ "MatCreateMPIAIJ" 2374 /*@C 2375 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2376 (the default parallel PETSc format). For good matrix assembly performance 2377 the user should preallocate the matrix storage by setting the parameters 2378 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2379 performance can be increased by more than a factor of 50. 2380 2381 Collective on MPI_Comm 2382 2383 Input Parameters: 2384 + comm - MPI communicator 2385 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2386 This value should be the same as the local size used in creating the 2387 y vector for the matrix-vector product y = Ax. 2388 . n - This value should be the same as the local size used in creating the 2389 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2390 calculated if N is given) For square matrices n is almost always m. 2391 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2392 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2393 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2394 (same value is used for all local rows) 2395 . d_nnz - array containing the number of nonzeros in the various rows of the 2396 DIAGONAL portion of the local submatrix (possibly different for each row) 2397 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2398 The size of this array is equal to the number of local rows, i.e 'm'. 2399 You must leave room for the diagonal entry even if it is zero. 2400 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2401 submatrix (same value is used for all local rows). 2402 - o_nnz - array containing the number of nonzeros in the various rows of the 2403 OFF-DIAGONAL portion of the local submatrix (possibly different for 2404 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2405 structure. The size of this array is equal to the number 2406 of local rows, i.e 'm'. 2407 2408 Output Parameter: 2409 . A - the matrix 2410 2411 Notes: 2412 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2413 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2414 storage requirements for this matrix. 2415 2416 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2417 processor than it must be used on all processors that share the object for 2418 that argument. 2419 2420 The AIJ format (also called the Yale sparse matrix format or 2421 compressed row storage), is fully compatible with standard Fortran 77 2422 storage. That is, the stored row and column indices can begin at 2423 either one (as in Fortran) or zero. See the users manual for details. 2424 2425 The user MUST specify either the local or global matrix dimensions 2426 (possibly both). 2427 2428 The parallel matrix is partitioned such that the first m0 rows belong to 2429 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2430 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2431 2432 The DIAGONAL portion of the local submatrix of a processor can be defined 2433 as the submatrix which is obtained by extraction the part corresponding 2434 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2435 first row that belongs to the processor, and r2 is the last row belonging 2436 to the this processor. This is a square mxm matrix. The remaining portion 2437 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2438 2439 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2440 2441 By default, this format uses inodes (identical nodes) when possible. 2442 We search for consecutive rows with the same nonzero structure, thereby 2443 reusing matrix information to achieve increased efficiency. 2444 2445 Options Database Keys: 2446 + -mat_aij_no_inode - Do not use inodes 2447 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2448 - -mat_aij_oneindex - Internally use indexing starting at 1 2449 rather than 0. Note that when calling MatSetValues(), 2450 the user still MUST index entries starting at 0! 2451 2452 2453 Example usage: 2454 2455 Consider the following 8x8 matrix with 34 non-zero values, that is 2456 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2457 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2458 as follows: 2459 2460 .vb 2461 1 2 0 | 0 3 0 | 0 4 2462 Proc0 0 5 6 | 7 0 0 | 8 0 2463 9 0 10 | 11 0 0 | 12 0 2464 ------------------------------------- 2465 13 0 14 | 15 16 17 | 0 0 2466 Proc1 0 18 0 | 19 20 21 | 0 0 2467 0 0 0 | 22 23 0 | 24 0 2468 ------------------------------------- 2469 Proc2 25 26 27 | 0 0 28 | 29 0 2470 30 0 0 | 31 32 33 | 0 34 2471 .ve 2472 2473 This can be represented as a collection of submatrices as: 2474 2475 .vb 2476 A B C 2477 D E F 2478 G H I 2479 .ve 2480 2481 Where the submatrices A,B,C are owned by proc0, D,E,F are 2482 owned by proc1, G,H,I are owned by proc2. 2483 2484 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2485 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2486 The 'M','N' parameters are 8,8, and have the same values on all procs. 2487 2488 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2489 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2490 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2491 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2492 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2493 matrix, ans [DF] as another SeqAIJ matrix. 2494 2495 When d_nz, o_nz parameters are specified, d_nz storage elements are 2496 allocated for every row of the local diagonal submatrix, and o_nz 2497 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2498 One way to choose d_nz and o_nz is to use the max nonzerors per local 2499 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2500 In this case, the values of d_nz,o_nz are: 2501 .vb 2502 proc0 : dnz = 2, o_nz = 2 2503 proc1 : dnz = 3, o_nz = 2 2504 proc2 : dnz = 1, o_nz = 4 2505 .ve 2506 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2507 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2508 for proc3. i.e we are using 12+15+10=37 storage locations to store 2509 34 values. 2510 2511 When d_nnz, o_nnz parameters are specified, the storage is specified 2512 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2513 In the above case the values for d_nnz,o_nnz are: 2514 .vb 2515 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2516 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2517 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2518 .ve 2519 Here the space allocated is sum of all the above values i.e 34, and 2520 hence pre-allocation is perfect. 2521 2522 Level: intermediate 2523 2524 .keywords: matrix, aij, compressed row, sparse, parallel 2525 2526 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2527 @*/ 2528 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2529 { 2530 int ierr,size; 2531 2532 PetscFunctionBegin; 2533 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2534 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2535 if (size > 1) { 2536 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2537 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2538 } else { 2539 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2540 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2541 } 2542 PetscFunctionReturn(0); 2543 } 2544 2545 #undef __FUNCT__ 2546 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2547 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2548 { 2549 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2550 PetscFunctionBegin; 2551 *Ad = a->A; 2552 *Ao = a->B; 2553 *colmap = a->garray; 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2559 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2560 { 2561 int ierr,i; 2562 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2563 2564 PetscFunctionBegin; 2565 if (coloring->ctype == IS_COLORING_LOCAL) { 2566 ISColoringValue *allcolors,*colors; 2567 ISColoring ocoloring; 2568 2569 /* set coloring for diagonal portion */ 2570 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2571 2572 /* set coloring for off-diagonal portion */ 2573 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2574 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2575 for (i=0; i<a->B->n; i++) { 2576 colors[i] = allcolors[a->garray[i]]; 2577 } 2578 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2579 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2580 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2581 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2582 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2583 ISColoringValue *colors; 2584 int *larray; 2585 ISColoring ocoloring; 2586 2587 /* set coloring for diagonal portion */ 2588 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2589 for (i=0; i<a->A->n; i++) { 2590 larray[i] = i + a->cstart; 2591 } 2592 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2593 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2594 for (i=0; i<a->A->n; i++) { 2595 colors[i] = coloring->colors[larray[i]]; 2596 } 2597 ierr = PetscFree(larray);CHKERRQ(ierr); 2598 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2599 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2600 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2601 2602 /* set coloring for off-diagonal portion */ 2603 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2604 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2605 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2606 for (i=0; i<a->B->n; i++) { 2607 colors[i] = coloring->colors[larray[i]]; 2608 } 2609 ierr = PetscFree(larray);CHKERRQ(ierr); 2610 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2611 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2612 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2613 } else { 2614 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2615 } 2616 2617 PetscFunctionReturn(0); 2618 } 2619 2620 #undef __FUNCT__ 2621 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2622 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2623 { 2624 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2625 int ierr; 2626 2627 PetscFunctionBegin; 2628 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2629 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 #undef __FUNCT__ 2634 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2635 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2636 { 2637 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2638 int ierr; 2639 2640 PetscFunctionBegin; 2641 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2642 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2643 PetscFunctionReturn(0); 2644 } 2645 2646 #undef __FUNCT__ 2647 #define __FUNCT__ "MatMerge" 2648 /*@C 2649 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2650 matrices from each processor 2651 2652 Collective on MPI_Comm 2653 2654 Input Parameters: 2655 + comm - the communicators the parallel matrix will live on 2656 - inmat - the input sequential matrices 2657 2658 Output Parameter: 2659 . outmat - the parallel matrix generated 2660 2661 Level: advanced 2662 2663 Notes: The number of columns of the matrix in EACH of the seperate files 2664 MUST be the same. 2665 2666 @*/ 2667 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2668 { 2669 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2670 PetscScalar *values; 2671 PetscMap columnmap,rowmap; 2672 2673 PetscFunctionBegin; 2674 2675 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2676 2677 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2678 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2679 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2680 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2681 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2682 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2683 2684 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2685 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2686 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2687 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2688 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2689 2690 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2691 for (i=0;i<m;i++) { 2692 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2693 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2694 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2695 } 2696 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2697 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2698 2699 for (i=0;i<m;i++) { 2700 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2701 I = i + rstart; 2702 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2703 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2704 } 2705 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2706 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2707 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2708 2709 PetscFunctionReturn(0); 2710 } 2711 2712 #undef __FUNCT__ 2713 #define __FUNCT__ "MatFileSplit" 2714 int MatFileSplit(Mat A,char *outfile) 2715 { 2716 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2717 PetscViewer out; 2718 char *name; 2719 Mat B; 2720 PetscScalar *values; 2721 2722 PetscFunctionBegin; 2723 2724 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2725 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2726 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2727 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2728 for (i=0;i<m;i++) { 2729 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2730 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2731 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2732 } 2733 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2734 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2735 2736 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2737 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2738 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2739 sprintf(name,"%s.%d",outfile,rank); 2740 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2741 ierr = PetscFree(name); 2742 ierr = MatView(B,out);CHKERRQ(ierr); 2743 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2744 ierr = MatDestroy(B);CHKERRQ(ierr); 2745 PetscFunctionReturn(0); 2746 } 2747