1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 #if defined(PETSC_HAVE_SUPERLUDIST) 413 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 414 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 415 #endif 416 417 #if defined(PETSC_HAVE_SPOOLES) 418 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 419 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 420 #endif 421 422 #if defined(PETSC_HAVE_MUMPS) 423 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 424 if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); } 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 431 int MatZeroEntries_MPIAIJ(Mat A) 432 { 433 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 434 int ierr; 435 436 PetscFunctionBegin; 437 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 438 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatZeroRows_MPIAIJ" 444 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 445 { 446 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 447 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 448 int *nprocs,j,idx,nsends,row; 449 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 450 int *rvalues,tag = A->tag,count,base,slen,n,*source; 451 int *lens,imdex,*lrows,*values,rstart=l->rstart; 452 MPI_Comm comm = A->comm; 453 MPI_Request *send_waits,*recv_waits; 454 MPI_Status recv_status,*send_status; 455 IS istmp; 456 PetscTruth found; 457 458 PetscFunctionBegin; 459 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 460 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 461 462 /* first count number of contributors to each processor */ 463 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 464 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 465 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 466 for (i=0; i<N; i++) { 467 idx = rows[i]; 468 found = PETSC_FALSE; 469 for (j=0; j<size; j++) { 470 if (idx >= owners[j] && idx < owners[j+1]) { 471 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 472 } 473 } 474 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 475 } 476 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 477 478 /* inform other processors of number of messages and max length*/ 479 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 480 481 /* post receives: */ 482 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 484 for (i=0; i<nrecvs; i++) { 485 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 486 } 487 488 /* do sends: 489 1) starts[i] gives the starting index in svalues for stuff going to 490 the ith processor 491 */ 492 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 493 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 494 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 495 starts[0] = 0; 496 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 497 for (i=0; i<N; i++) { 498 svalues[starts[owner[i]]++] = rows[i]; 499 } 500 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 501 502 starts[0] = 0; 503 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 504 count = 0; 505 for (i=0; i<size; i++) { 506 if (nprocs[2*i+1]) { 507 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 508 } 509 } 510 ierr = PetscFree(starts);CHKERRQ(ierr); 511 512 base = owners[rank]; 513 514 /* wait on receives */ 515 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 516 source = lens + nrecvs; 517 count = nrecvs; slen = 0; 518 while (count) { 519 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 520 /* unpack receives into our local space */ 521 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 522 source[imdex] = recv_status.MPI_SOURCE; 523 lens[imdex] = n; 524 slen += n; 525 count--; 526 } 527 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 528 529 /* move the data into the send scatter */ 530 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 531 count = 0; 532 for (i=0; i<nrecvs; i++) { 533 values = rvalues + i*nmax; 534 for (j=0; j<lens[i]; j++) { 535 lrows[count++] = values[j] - base; 536 } 537 } 538 ierr = PetscFree(rvalues);CHKERRQ(ierr); 539 ierr = PetscFree(lens);CHKERRQ(ierr); 540 ierr = PetscFree(owner);CHKERRQ(ierr); 541 ierr = PetscFree(nprocs);CHKERRQ(ierr); 542 543 /* actually zap the local rows */ 544 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 545 PetscLogObjectParent(A,istmp); 546 547 /* 548 Zero the required rows. If the "diagonal block" of the matrix 549 is square and the user wishes to set the diagonal we use seperate 550 code so that MatSetValues() is not called for each diagonal allocating 551 new memory, thus calling lots of mallocs and slowing things down. 552 553 Contributed by: Mathew Knepley 554 */ 555 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 556 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 557 if (diag && (l->A->M == l->A->N)) { 558 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 559 } else if (diag) { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 562 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 564 } 565 for (i = 0; i < slen; i++) { 566 row = lrows[i] + rstart; 567 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 568 } 569 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 } else { 572 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 573 } 574 ierr = ISDestroy(istmp);CHKERRQ(ierr); 575 ierr = PetscFree(lrows);CHKERRQ(ierr); 576 577 /* wait on sends */ 578 if (nsends) { 579 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 580 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 581 ierr = PetscFree(send_status);CHKERRQ(ierr); 582 } 583 ierr = PetscFree(send_waits);CHKERRQ(ierr); 584 ierr = PetscFree(svalues);CHKERRQ(ierr); 585 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMult_MPIAIJ" 591 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 int ierr,nt; 595 596 PetscFunctionBegin; 597 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 598 if (nt != A->n) { 599 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 600 } 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultAdd_MPIAIJ" 610 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 int ierr; 614 615 PetscFunctionBegin; 616 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 617 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 618 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 619 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 625 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 626 { 627 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 628 int ierr; 629 630 PetscFunctionBegin; 631 /* do nondiagonal part */ 632 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 633 /* send it on its way */ 634 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 635 /* do local part */ 636 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 637 /* receive remote parts: note this assumes the values are not actually */ 638 /* inserted in yy until the next line, which is true for my implementation*/ 639 /* but is not perhaps always true. */ 640 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } 643 644 EXTERN_C_BEGIN 645 #undef __FUNCT__ 646 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 647 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 648 { 649 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 650 Mat A = Aij->A, B,Aoff = Aij->B,Boff; 651 MatType type; 652 IS Me,Notme; 653 int M,N,first,last,*notme,i, ierr; 654 655 PetscFunctionBegin; 656 657 /* Compatible types */ 658 ierr = MatGetType(Bmat,&type); CHKERRQ(ierr); 659 ierr = PetscStrcmp(type,MATMPIAIJ,f); CHKERRQ(ierr); 660 if (!*f) SETERRQ(1,"Second matrix needs to be MPIAIJ too"); 661 662 /* Easy test: symmetric diagonal block */ 663 Bij = (Mat_MPIAIJ *) Bmat->data; B = Bij->B; 664 ierr = MatIsSymmetric(A,B,f); CHKERRQ(ierr); 665 if (!*f) PetscFunctionReturn(0); 666 667 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 668 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 669 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 670 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 671 for (i=0; i<first; i++) notme[i] = i; 672 for (i=last; i<M; i++) notme[i-last+first] = i; 673 ierr = ISCreateGeneral 674 (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 675 ierr = ISCreateStride 676 (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 677 ierr = MatGetSubMatrix 678 (B,Notme,Me,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Boff); CHKERRQ(ierr); 679 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 680 ierr = MatDestroy(Boff); CHKERRQ(ierr); 681 ierr = ISDestroy(Me); CHKERRQ(ierr); 682 ierr = ISDestroy(Notme); CHKERRQ(ierr); 683 684 PetscFunctionReturn(0); 685 } 686 EXTERN_C_END 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 690 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 691 { 692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 693 int ierr; 694 695 PetscFunctionBegin; 696 /* do nondiagonal part */ 697 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 698 /* send it on its way */ 699 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 700 /* do local part */ 701 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 702 /* receive remote parts: note this assumes the values are not actually */ 703 /* inserted in yy until the next line, which is true for my implementation*/ 704 /* but is not perhaps always true. */ 705 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 709 /* 710 This only works correctly for square matrices where the subblock A->A is the 711 diagonal block 712 */ 713 #undef __FUNCT__ 714 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 715 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 716 { 717 int ierr; 718 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 719 720 PetscFunctionBegin; 721 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 722 if (a->rstart != a->cstart || a->rend != a->cend) { 723 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 724 } 725 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatScale_MPIAIJ" 731 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 732 { 733 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 734 int ierr; 735 736 PetscFunctionBegin; 737 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 738 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatDestroy_MPIAIJ" 744 int MatDestroy_MPIAIJ(Mat mat) 745 { 746 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 747 int ierr; 748 749 PetscFunctionBegin; 750 #if defined(PETSC_USE_LOG) 751 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 752 #endif 753 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 754 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 755 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 756 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 757 #if defined (PETSC_USE_CTABLE) 758 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 759 #else 760 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 761 #endif 762 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 763 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 764 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 765 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 766 ierr = PetscFree(aij);CHKERRQ(ierr); 767 PetscFunctionReturn(0); 768 } 769 770 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 771 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 772 773 #undef __FUNCT__ 774 #define __FUNCT__ "MatView_MPIAIJ_Binary" 775 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 776 { 777 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 778 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 779 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 780 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 781 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 782 PetscScalar *column_values; 783 784 PetscFunctionBegin; 785 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 786 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 787 nz = A->nz + B->nz; 788 if (rank == 0) { 789 header[0] = MAT_FILE_COOKIE; 790 header[1] = mat->M; 791 header[2] = mat->N; 792 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 793 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 794 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 795 /* get largest number of rows any processor has */ 796 rlen = mat->m; 797 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 798 for (i=1; i<size; i++) { 799 rlen = PetscMax(rlen,range[i+1] - range[i]); 800 } 801 } else { 802 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 803 rlen = mat->m; 804 } 805 806 /* load up the local row counts */ 807 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 808 for (i=0; i<mat->m; i++) { 809 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 810 } 811 812 /* store the row lengths to the file */ 813 if (rank == 0) { 814 MPI_Status status; 815 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 816 for (i=1; i<size; i++) { 817 rlen = range[i+1] - range[i]; 818 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 819 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 820 } 821 } else { 822 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 823 } 824 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 825 826 /* load up the local column indices */ 827 nzmax = nz; /* )th processor needs space a largest processor needs */ 828 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 829 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 830 cnt = 0; 831 for (i=0; i<mat->m; i++) { 832 for (j=B->i[i]; j<B->i[i+1]; j++) { 833 if ( (col = garray[B->j[j]]) > cstart) break; 834 column_indices[cnt++] = col; 835 } 836 for (k=A->i[i]; k<A->i[i+1]; k++) { 837 column_indices[cnt++] = A->j[k] + cstart; 838 } 839 for (; j<B->i[i+1]; j++) { 840 column_indices[cnt++] = garray[B->j[j]]; 841 } 842 } 843 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 844 845 /* store the column indices to the file */ 846 if (rank == 0) { 847 MPI_Status status; 848 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 849 for (i=1; i<size; i++) { 850 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 851 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 852 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 853 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 854 } 855 } else { 856 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 857 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 858 } 859 ierr = PetscFree(column_indices);CHKERRQ(ierr); 860 861 /* load up the local column values */ 862 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 863 cnt = 0; 864 for (i=0; i<mat->m; i++) { 865 for (j=B->i[i]; j<B->i[i+1]; j++) { 866 if ( garray[B->j[j]] > cstart) break; 867 column_values[cnt++] = B->a[j]; 868 } 869 for (k=A->i[i]; k<A->i[i+1]; k++) { 870 column_values[cnt++] = A->a[k]; 871 } 872 for (; j<B->i[i+1]; j++) { 873 column_values[cnt++] = B->a[j]; 874 } 875 } 876 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 877 878 /* store the column values to the file */ 879 if (rank == 0) { 880 MPI_Status status; 881 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 882 for (i=1; i<size; i++) { 883 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 884 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 885 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 886 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 887 } 888 } else { 889 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 890 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 891 } 892 ierr = PetscFree(column_values);CHKERRQ(ierr); 893 PetscFunctionReturn(0); 894 } 895 896 #undef __FUNCT__ 897 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 898 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 899 { 900 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 901 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 902 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 903 PetscTruth isdraw,isascii,flg,isbinary; 904 PetscViewer sviewer; 905 PetscViewerFormat format; 906 907 PetscFunctionBegin; 908 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 909 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 910 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 911 if (isascii) { 912 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 913 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 914 MatInfo info; 915 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 916 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 917 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 918 if (flg) { 919 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 920 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 921 } else { 922 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 923 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 924 } 925 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 926 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 927 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 928 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 929 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 930 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 931 PetscFunctionReturn(0); 932 } else if (format == PETSC_VIEWER_ASCII_INFO) { 933 PetscFunctionReturn(0); 934 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 935 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 936 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 937 #endif 938 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 939 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 940 #endif 941 942 PetscFunctionReturn(0); 943 } 944 } else if (isbinary) { 945 if (size == 1) { 946 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 947 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 948 } else { 949 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 950 } 951 PetscFunctionReturn(0); 952 } else if (isdraw) { 953 PetscDraw draw; 954 PetscTruth isnull; 955 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 956 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 957 } 958 959 if (size == 1) { 960 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 961 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 962 } else { 963 /* assemble the entire matrix onto first processor. */ 964 Mat A; 965 Mat_SeqAIJ *Aloc; 966 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 967 PetscScalar *a; 968 969 if (!rank) { 970 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 971 } else { 972 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 973 } 974 PetscLogObjectParent(mat,A); 975 976 /* copy over the A part */ 977 Aloc = (Mat_SeqAIJ*)aij->A->data; 978 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 979 row = aij->rstart; 980 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 981 for (i=0; i<m; i++) { 982 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 983 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 984 } 985 aj = Aloc->j; 986 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 987 988 /* copy over the B part */ 989 Aloc = (Mat_SeqAIJ*)aij->B->data; 990 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 991 row = aij->rstart; 992 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 993 ct = cols; 994 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 995 for (i=0; i<m; i++) { 996 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 997 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 998 } 999 ierr = PetscFree(ct);CHKERRQ(ierr); 1000 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1001 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1002 /* 1003 Everyone has to call to draw the matrix since the graphics waits are 1004 synchronized across all processors that share the PetscDraw object 1005 */ 1006 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1007 if (!rank) { 1008 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1009 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1010 } 1011 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1012 ierr = MatDestroy(A);CHKERRQ(ierr); 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatView_MPIAIJ" 1019 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1020 { 1021 int ierr; 1022 PetscTruth isascii,isdraw,issocket,isbinary; 1023 1024 PetscFunctionBegin; 1025 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1026 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1027 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1028 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1029 if (isascii || isdraw || isbinary || issocket) { 1030 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1031 } else { 1032 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1033 } 1034 PetscFunctionReturn(0); 1035 } 1036 1037 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatRelax_MPIAIJ" 1041 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1042 { 1043 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1044 int ierr; 1045 Vec bb1; 1046 PetscScalar mone=-1.0; 1047 1048 PetscFunctionBegin; 1049 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1050 1051 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1052 1053 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1054 if (flag & SOR_ZERO_INITIAL_GUESS) { 1055 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1056 its--; 1057 } 1058 1059 while (its--) { 1060 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1061 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1062 1063 /* update rhs: bb1 = bb - B*x */ 1064 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1065 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1066 1067 /* local sweep */ 1068 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1069 CHKERRQ(ierr); 1070 } 1071 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1072 if (flag & SOR_ZERO_INITIAL_GUESS) { 1073 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1074 its--; 1075 } 1076 while (its--) { 1077 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1078 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1079 1080 /* update rhs: bb1 = bb - B*x */ 1081 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1082 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1083 1084 /* local sweep */ 1085 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1086 CHKERRQ(ierr); 1087 } 1088 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1089 if (flag & SOR_ZERO_INITIAL_GUESS) { 1090 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1091 its--; 1092 } 1093 while (its--) { 1094 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1095 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1096 1097 /* update rhs: bb1 = bb - B*x */ 1098 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1099 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1100 1101 /* local sweep */ 1102 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1103 CHKERRQ(ierr); 1104 } 1105 } else { 1106 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1107 } 1108 1109 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1115 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1116 { 1117 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1118 Mat A = mat->A,B = mat->B; 1119 int ierr; 1120 PetscReal isend[5],irecv[5]; 1121 1122 PetscFunctionBegin; 1123 info->block_size = 1.0; 1124 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1125 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1126 isend[3] = info->memory; isend[4] = info->mallocs; 1127 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1128 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1129 isend[3] += info->memory; isend[4] += info->mallocs; 1130 if (flag == MAT_LOCAL) { 1131 info->nz_used = isend[0]; 1132 info->nz_allocated = isend[1]; 1133 info->nz_unneeded = isend[2]; 1134 info->memory = isend[3]; 1135 info->mallocs = isend[4]; 1136 } else if (flag == MAT_GLOBAL_MAX) { 1137 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1138 info->nz_used = irecv[0]; 1139 info->nz_allocated = irecv[1]; 1140 info->nz_unneeded = irecv[2]; 1141 info->memory = irecv[3]; 1142 info->mallocs = irecv[4]; 1143 } else if (flag == MAT_GLOBAL_SUM) { 1144 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1145 info->nz_used = irecv[0]; 1146 info->nz_allocated = irecv[1]; 1147 info->nz_unneeded = irecv[2]; 1148 info->memory = irecv[3]; 1149 info->mallocs = irecv[4]; 1150 } 1151 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1152 info->fill_ratio_needed = 0; 1153 info->factor_mallocs = 0; 1154 info->rows_global = (double)matin->M; 1155 info->columns_global = (double)matin->N; 1156 info->rows_local = (double)matin->m; 1157 info->columns_local = (double)matin->N; 1158 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatSetOption_MPIAIJ" 1164 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1167 int ierr; 1168 1169 PetscFunctionBegin; 1170 switch (op) { 1171 case MAT_NO_NEW_NONZERO_LOCATIONS: 1172 case MAT_YES_NEW_NONZERO_LOCATIONS: 1173 case MAT_COLUMNS_UNSORTED: 1174 case MAT_COLUMNS_SORTED: 1175 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1176 case MAT_KEEP_ZEROED_ROWS: 1177 case MAT_NEW_NONZERO_LOCATION_ERR: 1178 case MAT_USE_INODES: 1179 case MAT_DO_NOT_USE_INODES: 1180 case MAT_IGNORE_ZERO_ENTRIES: 1181 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1182 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1183 break; 1184 case MAT_ROW_ORIENTED: 1185 a->roworiented = PETSC_TRUE; 1186 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1187 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1188 break; 1189 case MAT_ROWS_SORTED: 1190 case MAT_ROWS_UNSORTED: 1191 case MAT_YES_NEW_DIAGONALS: 1192 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1193 break; 1194 case MAT_COLUMN_ORIENTED: 1195 a->roworiented = PETSC_FALSE; 1196 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1197 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1198 break; 1199 case MAT_IGNORE_OFF_PROC_ENTRIES: 1200 a->donotstash = PETSC_TRUE; 1201 break; 1202 case MAT_NO_NEW_DIAGONALS: 1203 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1204 default: 1205 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1206 } 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatGetRow_MPIAIJ" 1212 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1213 { 1214 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1215 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1216 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1217 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1218 int *cmap,*idx_p; 1219 1220 PetscFunctionBegin; 1221 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1222 mat->getrowactive = PETSC_TRUE; 1223 1224 if (!mat->rowvalues && (idx || v)) { 1225 /* 1226 allocate enough space to hold information from the longest row. 1227 */ 1228 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1229 int max = 1,tmp; 1230 for (i=0; i<matin->m; i++) { 1231 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1232 if (max < tmp) { max = tmp; } 1233 } 1234 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1235 mat->rowindices = (int*)(mat->rowvalues + max); 1236 } 1237 1238 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1239 lrow = row - rstart; 1240 1241 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1242 if (!v) {pvA = 0; pvB = 0;} 1243 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1244 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1245 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1246 nztot = nzA + nzB; 1247 1248 cmap = mat->garray; 1249 if (v || idx) { 1250 if (nztot) { 1251 /* Sort by increasing column numbers, assuming A and B already sorted */ 1252 int imark = -1; 1253 if (v) { 1254 *v = v_p = mat->rowvalues; 1255 for (i=0; i<nzB; i++) { 1256 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1257 else break; 1258 } 1259 imark = i; 1260 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1261 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1262 } 1263 if (idx) { 1264 *idx = idx_p = mat->rowindices; 1265 if (imark > -1) { 1266 for (i=0; i<imark; i++) { 1267 idx_p[i] = cmap[cworkB[i]]; 1268 } 1269 } else { 1270 for (i=0; i<nzB; i++) { 1271 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1272 else break; 1273 } 1274 imark = i; 1275 } 1276 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1277 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1278 } 1279 } else { 1280 if (idx) *idx = 0; 1281 if (v) *v = 0; 1282 } 1283 } 1284 *nz = nztot; 1285 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1286 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1292 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1293 { 1294 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1295 1296 PetscFunctionBegin; 1297 if (aij->getrowactive == PETSC_FALSE) { 1298 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1299 } 1300 aij->getrowactive = PETSC_FALSE; 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatNorm_MPIAIJ" 1306 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1307 { 1308 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1309 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1310 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1311 PetscReal sum = 0.0; 1312 PetscScalar *v; 1313 1314 PetscFunctionBegin; 1315 if (aij->size == 1) { 1316 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1317 } else { 1318 if (type == NORM_FROBENIUS) { 1319 v = amat->a; 1320 for (i=0; i<amat->nz; i++) { 1321 #if defined(PETSC_USE_COMPLEX) 1322 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1323 #else 1324 sum += (*v)*(*v); v++; 1325 #endif 1326 } 1327 v = bmat->a; 1328 for (i=0; i<bmat->nz; i++) { 1329 #if defined(PETSC_USE_COMPLEX) 1330 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1331 #else 1332 sum += (*v)*(*v); v++; 1333 #endif 1334 } 1335 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1336 *norm = sqrt(*norm); 1337 } else if (type == NORM_1) { /* max column norm */ 1338 PetscReal *tmp,*tmp2; 1339 int *jj,*garray = aij->garray; 1340 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1341 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1342 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1343 *norm = 0.0; 1344 v = amat->a; jj = amat->j; 1345 for (j=0; j<amat->nz; j++) { 1346 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1347 } 1348 v = bmat->a; jj = bmat->j; 1349 for (j=0; j<bmat->nz; j++) { 1350 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1351 } 1352 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1353 for (j=0; j<mat->N; j++) { 1354 if (tmp2[j] > *norm) *norm = tmp2[j]; 1355 } 1356 ierr = PetscFree(tmp);CHKERRQ(ierr); 1357 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1358 } else if (type == NORM_INFINITY) { /* max row norm */ 1359 PetscReal ntemp = 0.0; 1360 for (j=0; j<aij->A->m; j++) { 1361 v = amat->a + amat->i[j] + shift; 1362 sum = 0.0; 1363 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1364 sum += PetscAbsScalar(*v); v++; 1365 } 1366 v = bmat->a + bmat->i[j] + shift; 1367 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1368 sum += PetscAbsScalar(*v); v++; 1369 } 1370 if (sum > ntemp) ntemp = sum; 1371 } 1372 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1373 } else { 1374 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1375 } 1376 } 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatTranspose_MPIAIJ" 1382 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1383 { 1384 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1385 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1386 int ierr,shift = Aloc->indexshift; 1387 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1388 Mat B; 1389 PetscScalar *array; 1390 1391 PetscFunctionBegin; 1392 if (!matout && M != N) { 1393 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1394 } 1395 1396 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1397 1398 /* copy over the A part */ 1399 Aloc = (Mat_SeqAIJ*)a->A->data; 1400 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1401 row = a->rstart; 1402 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1403 for (i=0; i<m; i++) { 1404 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1405 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1406 } 1407 aj = Aloc->j; 1408 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1409 1410 /* copy over the B part */ 1411 Aloc = (Mat_SeqAIJ*)a->B->data; 1412 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1413 row = a->rstart; 1414 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1415 ct = cols; 1416 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1417 for (i=0; i<m; i++) { 1418 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1419 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1420 } 1421 ierr = PetscFree(ct);CHKERRQ(ierr); 1422 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1423 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1424 if (matout) { 1425 *matout = B; 1426 } else { 1427 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1434 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1435 { 1436 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1437 Mat a = aij->A,b = aij->B; 1438 int ierr,s1,s2,s3; 1439 1440 PetscFunctionBegin; 1441 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1442 if (rr) { 1443 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1444 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1445 /* Overlap communication with computation. */ 1446 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1447 } 1448 if (ll) { 1449 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1450 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1451 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1452 } 1453 /* scale the diagonal block */ 1454 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1455 1456 if (rr) { 1457 /* Do a scatter end and then right scale the off-diagonal block */ 1458 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1459 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1460 } 1461 1462 PetscFunctionReturn(0); 1463 } 1464 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1468 int MatPrintHelp_MPIAIJ(Mat A) 1469 { 1470 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1471 int ierr; 1472 1473 PetscFunctionBegin; 1474 if (!a->rank) { 1475 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1476 } 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1482 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1483 { 1484 PetscFunctionBegin; 1485 *bs = 1; 1486 PetscFunctionReturn(0); 1487 } 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1490 int MatSetUnfactored_MPIAIJ(Mat A) 1491 { 1492 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1493 int ierr; 1494 1495 PetscFunctionBegin; 1496 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatEqual_MPIAIJ" 1502 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1503 { 1504 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1505 Mat a,b,c,d; 1506 PetscTruth flg; 1507 int ierr; 1508 1509 PetscFunctionBegin; 1510 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1511 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1512 a = matA->A; b = matA->B; 1513 c = matB->A; d = matB->B; 1514 1515 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1516 if (flg == PETSC_TRUE) { 1517 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1518 } 1519 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatCopy_MPIAIJ" 1525 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1526 { 1527 int ierr; 1528 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1529 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1530 PetscTruth flg; 1531 1532 PetscFunctionBegin; 1533 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1534 if (str != SAME_NONZERO_PATTERN || !flg) { 1535 /* because of the column compression in the off-processor part of the matrix a->B, 1536 the number of columns in a->B and b->B may be different, hence we cannot call 1537 the MatCopy() directly on the two parts. If need be, we can provide a more 1538 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1539 then copying the submatrices */ 1540 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1541 } else { 1542 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1543 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1544 } 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1550 int MatSetUpPreallocation_MPIAIJ(Mat A) 1551 { 1552 int ierr; 1553 1554 PetscFunctionBegin; 1555 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1556 PetscFunctionReturn(0); 1557 } 1558 1559 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1560 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1561 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1562 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1563 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1564 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1565 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1566 #endif 1567 1568 #include "petscblaslapack.h" 1569 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure); 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatAXPY_MPIAIJ" 1572 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1573 { 1574 int ierr,one=1,i; 1575 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1576 Mat_SeqAIJ *x,*y; 1577 1578 PetscFunctionBegin; 1579 if (str == SAME_NONZERO_PATTERN) { 1580 x = (Mat_SeqAIJ *)xx->A->data; 1581 y = (Mat_SeqAIJ *)yy->A->data; 1582 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1583 x = (Mat_SeqAIJ *)xx->B->data; 1584 y = (Mat_SeqAIJ *)yy->B->data; 1585 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1586 } else if (str == SUBSET_NONZERO_PATTERN) { 1587 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1588 1589 x = (Mat_SeqAIJ *)xx->B->data; 1590 y = (Mat_SeqAIJ *)yy->B->data; 1591 if (y->xtoy && y->XtoY != xx->B) { 1592 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1593 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1594 } 1595 if (!y->xtoy) { /* get xtoy */ 1596 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1597 y->XtoY = xx->B; 1598 } 1599 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1600 } else { 1601 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1602 } 1603 PetscFunctionReturn(0); 1604 } 1605 1606 /* -------------------------------------------------------------------*/ 1607 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1608 MatGetRow_MPIAIJ, 1609 MatRestoreRow_MPIAIJ, 1610 MatMult_MPIAIJ, 1611 MatMultAdd_MPIAIJ, 1612 MatMultTranspose_MPIAIJ, 1613 MatMultTransposeAdd_MPIAIJ, 1614 0, 1615 0, 1616 0, 1617 0, 1618 0, 1619 0, 1620 MatRelax_MPIAIJ, 1621 MatTranspose_MPIAIJ, 1622 MatGetInfo_MPIAIJ, 1623 MatEqual_MPIAIJ, 1624 MatGetDiagonal_MPIAIJ, 1625 MatDiagonalScale_MPIAIJ, 1626 MatNorm_MPIAIJ, 1627 MatAssemblyBegin_MPIAIJ, 1628 MatAssemblyEnd_MPIAIJ, 1629 0, 1630 MatSetOption_MPIAIJ, 1631 MatZeroEntries_MPIAIJ, 1632 MatZeroRows_MPIAIJ, 1633 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1634 MatLUFactorSymbolic_MPIAIJ_TFS, 1635 #else 1636 0, 1637 #endif 1638 0, 1639 0, 1640 0, 1641 MatSetUpPreallocation_MPIAIJ, 1642 0, 1643 0, 1644 0, 1645 0, 1646 MatDuplicate_MPIAIJ, 1647 0, 1648 0, 1649 0, 1650 0, 1651 MatAXPY_MPIAIJ, 1652 MatGetSubMatrices_MPIAIJ, 1653 MatIncreaseOverlap_MPIAIJ, 1654 MatGetValues_MPIAIJ, 1655 MatCopy_MPIAIJ, 1656 MatPrintHelp_MPIAIJ, 1657 MatScale_MPIAIJ, 1658 0, 1659 0, 1660 0, 1661 MatGetBlockSize_MPIAIJ, 1662 0, 1663 0, 1664 0, 1665 0, 1666 MatFDColoringCreate_MPIAIJ, 1667 0, 1668 MatSetUnfactored_MPIAIJ, 1669 0, 1670 0, 1671 MatGetSubMatrix_MPIAIJ, 1672 MatDestroy_MPIAIJ, 1673 MatView_MPIAIJ, 1674 MatGetPetscMaps_Petsc, 1675 0, 1676 0, 1677 0, 1678 0, 1679 0, 1680 0, 1681 0, 1682 0, 1683 MatSetColoring_MPIAIJ, 1684 MatSetValuesAdic_MPIAIJ, 1685 MatSetValuesAdifor_MPIAIJ 1686 }; 1687 1688 /* ----------------------------------------------------------------------------------------*/ 1689 1690 EXTERN_C_BEGIN 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1693 int MatStoreValues_MPIAIJ(Mat mat) 1694 { 1695 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1696 int ierr; 1697 1698 PetscFunctionBegin; 1699 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1700 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 EXTERN_C_END 1704 1705 EXTERN_C_BEGIN 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1708 int MatRetrieveValues_MPIAIJ(Mat mat) 1709 { 1710 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1711 int ierr; 1712 1713 PetscFunctionBegin; 1714 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1715 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 EXTERN_C_END 1719 1720 #include "petscpc.h" 1721 EXTERN_C_BEGIN 1722 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1723 EXTERN_C_END 1724 1725 EXTERN_C_BEGIN 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatCreate_MPIAIJ" 1728 int MatCreate_MPIAIJ(Mat B) 1729 { 1730 Mat_MPIAIJ *b; 1731 int ierr,i,size; 1732 1733 PetscFunctionBegin; 1734 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1735 1736 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1737 B->data = (void*)b; 1738 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1739 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1740 B->factor = 0; 1741 B->assembled = PETSC_FALSE; 1742 B->mapping = 0; 1743 1744 B->insertmode = NOT_SET_VALUES; 1745 b->size = size; 1746 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1747 1748 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1749 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1750 1751 /* the information in the maps duplicates the information computed below, eventually 1752 we should remove the duplicate information that is not contained in the maps */ 1753 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1754 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1755 1756 /* build local table of row and column ownerships */ 1757 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1758 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1759 b->cowners = b->rowners + b->size + 2; 1760 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1761 b->rowners[0] = 0; 1762 for (i=2; i<=b->size; i++) { 1763 b->rowners[i] += b->rowners[i-1]; 1764 } 1765 b->rstart = b->rowners[b->rank]; 1766 b->rend = b->rowners[b->rank+1]; 1767 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1768 b->cowners[0] = 0; 1769 for (i=2; i<=b->size; i++) { 1770 b->cowners[i] += b->cowners[i-1]; 1771 } 1772 b->cstart = b->cowners[b->rank]; 1773 b->cend = b->cowners[b->rank+1]; 1774 1775 /* build cache for off array entries formed */ 1776 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1777 b->donotstash = PETSC_FALSE; 1778 b->colmap = 0; 1779 b->garray = 0; 1780 b->roworiented = PETSC_TRUE; 1781 1782 /* stuff used for matrix vector multiply */ 1783 b->lvec = PETSC_NULL; 1784 b->Mvctx = PETSC_NULL; 1785 1786 /* stuff for MatGetRow() */ 1787 b->rowindices = 0; 1788 b->rowvalues = 0; 1789 b->getrowactive = PETSC_FALSE; 1790 1791 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1792 "MatStoreValues_MPIAIJ", 1793 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1794 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1795 "MatRetrieveValues_MPIAIJ", 1796 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1797 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1798 "MatGetDiagonalBlock_MPIAIJ", 1799 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1800 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1801 "MatIsSymmetric_MPIAIJ", 1802 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1803 PetscFunctionReturn(0); 1804 } 1805 EXTERN_C_END 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1809 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1810 { 1811 Mat mat; 1812 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1813 int ierr; 1814 1815 PetscFunctionBegin; 1816 *newmat = 0; 1817 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1818 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1819 a = (Mat_MPIAIJ*)mat->data; 1820 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1821 mat->factor = matin->factor; 1822 mat->assembled = PETSC_TRUE; 1823 mat->insertmode = NOT_SET_VALUES; 1824 mat->preallocated = PETSC_TRUE; 1825 1826 a->rstart = oldmat->rstart; 1827 a->rend = oldmat->rend; 1828 a->cstart = oldmat->cstart; 1829 a->cend = oldmat->cend; 1830 a->size = oldmat->size; 1831 a->rank = oldmat->rank; 1832 a->donotstash = oldmat->donotstash; 1833 a->roworiented = oldmat->roworiented; 1834 a->rowindices = 0; 1835 a->rowvalues = 0; 1836 a->getrowactive = PETSC_FALSE; 1837 1838 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1839 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1840 if (oldmat->colmap) { 1841 #if defined (PETSC_USE_CTABLE) 1842 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1843 #else 1844 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1845 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1846 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1847 #endif 1848 } else a->colmap = 0; 1849 if (oldmat->garray) { 1850 int len; 1851 len = oldmat->B->n; 1852 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1853 PetscLogObjectMemory(mat,len*sizeof(int)); 1854 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1855 } else a->garray = 0; 1856 1857 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1858 PetscLogObjectParent(mat,a->lvec); 1859 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1860 PetscLogObjectParent(mat,a->Mvctx); 1861 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1862 PetscLogObjectParent(mat,a->A); 1863 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1864 PetscLogObjectParent(mat,a->B); 1865 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1866 *newmat = mat; 1867 PetscFunctionReturn(0); 1868 } 1869 1870 #include "petscsys.h" 1871 1872 EXTERN_C_BEGIN 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "MatLoad_MPIAIJ" 1875 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1876 { 1877 Mat A; 1878 PetscScalar *vals,*svals; 1879 MPI_Comm comm = ((PetscObject)viewer)->comm; 1880 MPI_Status status; 1881 int i,nz,ierr,j,rstart,rend,fd; 1882 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1883 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1884 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1885 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS) 1886 PetscTruth flag; 1887 #endif 1888 1889 PetscFunctionBegin; 1890 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1891 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1892 if (!rank) { 1893 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1894 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1895 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1896 if (header[3] < 0) { 1897 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1898 } 1899 } 1900 1901 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1902 M = header[1]; N = header[2]; 1903 /* determine ownership of all rows */ 1904 m = M/size + ((M % size) > rank); 1905 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1906 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1907 rowners[0] = 0; 1908 for (i=2; i<=size; i++) { 1909 rowners[i] += rowners[i-1]; 1910 } 1911 rstart = rowners[rank]; 1912 rend = rowners[rank+1]; 1913 1914 /* distribute row lengths to all processors */ 1915 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1916 offlens = ourlens + (rend-rstart); 1917 if (!rank) { 1918 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1919 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1920 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1921 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1922 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1923 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1924 } else { 1925 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1926 } 1927 1928 if (!rank) { 1929 /* calculate the number of nonzeros on each processor */ 1930 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1931 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1932 for (i=0; i<size; i++) { 1933 for (j=rowners[i]; j< rowners[i+1]; j++) { 1934 procsnz[i] += rowlengths[j]; 1935 } 1936 } 1937 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1938 1939 /* determine max buffer needed and allocate it */ 1940 maxnz = 0; 1941 for (i=0; i<size; i++) { 1942 maxnz = PetscMax(maxnz,procsnz[i]); 1943 } 1944 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1945 1946 /* read in my part of the matrix column indices */ 1947 nz = procsnz[0]; 1948 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1949 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1950 1951 /* read in every one elses and ship off */ 1952 for (i=1; i<size; i++) { 1953 nz = procsnz[i]; 1954 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1955 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1956 } 1957 ierr = PetscFree(cols);CHKERRQ(ierr); 1958 } else { 1959 /* determine buffer space needed for message */ 1960 nz = 0; 1961 for (i=0; i<m; i++) { 1962 nz += ourlens[i]; 1963 } 1964 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1965 1966 /* receive message of column indices*/ 1967 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1968 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1969 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1970 } 1971 1972 /* determine column ownership if matrix is not square */ 1973 if (N != M) { 1974 n = N/size + ((N % size) > rank); 1975 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1976 cstart = cend - n; 1977 } else { 1978 cstart = rstart; 1979 cend = rend; 1980 n = cend - cstart; 1981 } 1982 1983 /* loop over local rows, determining number of off diagonal entries */ 1984 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1985 jj = 0; 1986 for (i=0; i<m; i++) { 1987 for (j=0; j<ourlens[i]; j++) { 1988 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1989 jj++; 1990 } 1991 } 1992 1993 /* create our matrix */ 1994 for (i=0; i<m; i++) { 1995 ourlens[i] -= offlens[i]; 1996 } 1997 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1998 A = *newmat; 1999 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2000 for (i=0; i<m; i++) { 2001 ourlens[i] += offlens[i]; 2002 } 2003 2004 if (!rank) { 2005 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2006 2007 /* read in my part of the matrix numerical values */ 2008 nz = procsnz[0]; 2009 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2010 2011 /* insert into matrix */ 2012 jj = rstart; 2013 smycols = mycols; 2014 svals = vals; 2015 for (i=0; i<m; i++) { 2016 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2017 smycols += ourlens[i]; 2018 svals += ourlens[i]; 2019 jj++; 2020 } 2021 2022 /* read in other processors and ship out */ 2023 for (i=1; i<size; i++) { 2024 nz = procsnz[i]; 2025 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2026 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2027 } 2028 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2029 } else { 2030 /* receive numeric values */ 2031 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2032 2033 /* receive message of values*/ 2034 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2035 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2036 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2037 2038 /* insert into matrix */ 2039 jj = rstart; 2040 smycols = mycols; 2041 svals = vals; 2042 for (i=0; i<m; i++) { 2043 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2044 smycols += ourlens[i]; 2045 svals += ourlens[i]; 2046 jj++; 2047 } 2048 } 2049 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2050 ierr = PetscFree(vals);CHKERRQ(ierr); 2051 ierr = PetscFree(mycols);CHKERRQ(ierr); 2052 ierr = PetscFree(rowners);CHKERRQ(ierr); 2053 2054 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2055 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2056 #if defined(PETSC_HAVE_SPOOLES) 2057 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 2058 if (flag) { 2059 if (size == 1) { 2060 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2061 } else { 2062 ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); 2063 } 2064 } 2065 #endif 2066 #if defined(PETSC_HAVE_SUPERLUDIST) 2067 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 2068 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 2069 #endif 2070 #if defined(PETSC_HAVE_MUMPS) 2071 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 2072 if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); } 2073 #endif 2074 PetscFunctionReturn(0); 2075 } 2076 EXTERN_C_END 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2080 /* 2081 Not great since it makes two copies of the submatrix, first an SeqAIJ 2082 in local and then by concatenating the local matrices the end result. 2083 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2084 */ 2085 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2086 { 2087 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2088 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2089 Mat *local,M,Mreuse; 2090 PetscScalar *vwork,*aa; 2091 MPI_Comm comm = mat->comm; 2092 Mat_SeqAIJ *aij; 2093 2094 2095 PetscFunctionBegin; 2096 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2097 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2098 2099 if (call == MAT_REUSE_MATRIX) { 2100 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2101 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2102 local = &Mreuse; 2103 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2104 } else { 2105 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2106 Mreuse = *local; 2107 ierr = PetscFree(local);CHKERRQ(ierr); 2108 } 2109 2110 /* 2111 m - number of local rows 2112 n - number of columns (same on all processors) 2113 rstart - first row in new global matrix generated 2114 */ 2115 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2116 if (call == MAT_INITIAL_MATRIX) { 2117 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2118 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2119 ii = aij->i; 2120 jj = aij->j; 2121 2122 /* 2123 Determine the number of non-zeros in the diagonal and off-diagonal 2124 portions of the matrix in order to do correct preallocation 2125 */ 2126 2127 /* first get start and end of "diagonal" columns */ 2128 if (csize == PETSC_DECIDE) { 2129 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2130 if (mglobal == n) { /* square matrix */ 2131 nlocal = m; 2132 } else { 2133 nlocal = n/size + ((n % size) > rank); 2134 } 2135 } else { 2136 nlocal = csize; 2137 } 2138 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2139 rstart = rend - nlocal; 2140 if (rank == size - 1 && rend != n) { 2141 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2142 } 2143 2144 /* next, compute all the lengths */ 2145 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2146 olens = dlens + m; 2147 for (i=0; i<m; i++) { 2148 jend = ii[i+1] - ii[i]; 2149 olen = 0; 2150 dlen = 0; 2151 for (j=0; j<jend; j++) { 2152 if (*jj < rstart || *jj >= rend) olen++; 2153 else dlen++; 2154 jj++; 2155 } 2156 olens[i] = olen; 2157 dlens[i] = dlen; 2158 } 2159 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2160 ierr = PetscFree(dlens);CHKERRQ(ierr); 2161 } else { 2162 int ml,nl; 2163 2164 M = *newmat; 2165 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2166 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2167 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2168 /* 2169 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2170 rather than the slower MatSetValues(). 2171 */ 2172 M->was_assembled = PETSC_TRUE; 2173 M->assembled = PETSC_FALSE; 2174 } 2175 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2176 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2177 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2178 ii = aij->i; 2179 jj = aij->j; 2180 aa = aij->a; 2181 for (i=0; i<m; i++) { 2182 row = rstart + i; 2183 nz = ii[i+1] - ii[i]; 2184 cwork = jj; jj += nz; 2185 vwork = aa; aa += nz; 2186 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2187 } 2188 2189 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2190 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2191 *newmat = M; 2192 2193 /* save submatrix used in processor for next request */ 2194 if (call == MAT_INITIAL_MATRIX) { 2195 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2196 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2197 } 2198 2199 PetscFunctionReturn(0); 2200 } 2201 2202 #undef __FUNCT__ 2203 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2204 /*@C 2205 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2206 (the default parallel PETSc format). For good matrix assembly performance 2207 the user should preallocate the matrix storage by setting the parameters 2208 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2209 performance can be increased by more than a factor of 50. 2210 2211 Collective on MPI_Comm 2212 2213 Input Parameters: 2214 + A - the matrix 2215 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2216 (same value is used for all local rows) 2217 . d_nnz - array containing the number of nonzeros in the various rows of the 2218 DIAGONAL portion of the local submatrix (possibly different for each row) 2219 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2220 The size of this array is equal to the number of local rows, i.e 'm'. 2221 You must leave room for the diagonal entry even if it is zero. 2222 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2223 submatrix (same value is used for all local rows). 2224 - o_nnz - array containing the number of nonzeros in the various rows of the 2225 OFF-DIAGONAL portion of the local submatrix (possibly different for 2226 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2227 structure. The size of this array is equal to the number 2228 of local rows, i.e 'm'. 2229 2230 The AIJ format (also called the Yale sparse matrix format or 2231 compressed row storage), is fully compatible with standard Fortran 77 2232 storage. That is, the stored row and column indices can begin at 2233 either one (as in Fortran) or zero. See the users manual for details. 2234 2235 The user MUST specify either the local or global matrix dimensions 2236 (possibly both). 2237 2238 The parallel matrix is partitioned such that the first m0 rows belong to 2239 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2240 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2241 2242 The DIAGONAL portion of the local submatrix of a processor can be defined 2243 as the submatrix which is obtained by extraction the part corresponding 2244 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2245 first row that belongs to the processor, and r2 is the last row belonging 2246 to the this processor. This is a square mxm matrix. The remaining portion 2247 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2248 2249 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2250 2251 By default, this format uses inodes (identical nodes) when possible. 2252 We search for consecutive rows with the same nonzero structure, thereby 2253 reusing matrix information to achieve increased efficiency. 2254 2255 Options Database Keys: 2256 + -mat_aij_no_inode - Do not use inodes 2257 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2258 - -mat_aij_oneindex - Internally use indexing starting at 1 2259 rather than 0. Note that when calling MatSetValues(), 2260 the user still MUST index entries starting at 0! 2261 2262 Example usage: 2263 2264 Consider the following 8x8 matrix with 34 non-zero values, that is 2265 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2266 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2267 as follows: 2268 2269 .vb 2270 1 2 0 | 0 3 0 | 0 4 2271 Proc0 0 5 6 | 7 0 0 | 8 0 2272 9 0 10 | 11 0 0 | 12 0 2273 ------------------------------------- 2274 13 0 14 | 15 16 17 | 0 0 2275 Proc1 0 18 0 | 19 20 21 | 0 0 2276 0 0 0 | 22 23 0 | 24 0 2277 ------------------------------------- 2278 Proc2 25 26 27 | 0 0 28 | 29 0 2279 30 0 0 | 31 32 33 | 0 34 2280 .ve 2281 2282 This can be represented as a collection of submatrices as: 2283 2284 .vb 2285 A B C 2286 D E F 2287 G H I 2288 .ve 2289 2290 Where the submatrices A,B,C are owned by proc0, D,E,F are 2291 owned by proc1, G,H,I are owned by proc2. 2292 2293 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2294 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2295 The 'M','N' parameters are 8,8, and have the same values on all procs. 2296 2297 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2298 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2299 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2300 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2301 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2302 matrix, ans [DF] as another SeqAIJ matrix. 2303 2304 When d_nz, o_nz parameters are specified, d_nz storage elements are 2305 allocated for every row of the local diagonal submatrix, and o_nz 2306 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2307 One way to choose d_nz and o_nz is to use the max nonzerors per local 2308 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2309 In this case, the values of d_nz,o_nz are: 2310 .vb 2311 proc0 : dnz = 2, o_nz = 2 2312 proc1 : dnz = 3, o_nz = 2 2313 proc2 : dnz = 1, o_nz = 4 2314 .ve 2315 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2316 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2317 for proc3. i.e we are using 12+15+10=37 storage locations to store 2318 34 values. 2319 2320 When d_nnz, o_nnz parameters are specified, the storage is specified 2321 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2322 In the above case the values for d_nnz,o_nnz are: 2323 .vb 2324 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2325 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2326 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2327 .ve 2328 Here the space allocated is sum of all the above values i.e 34, and 2329 hence pre-allocation is perfect. 2330 2331 Level: intermediate 2332 2333 .keywords: matrix, aij, compressed row, sparse, parallel 2334 2335 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2336 @*/ 2337 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2338 { 2339 Mat_MPIAIJ *b; 2340 int ierr,i; 2341 PetscTruth flg2; 2342 2343 PetscFunctionBegin; 2344 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2345 if (!flg2) PetscFunctionReturn(0); 2346 B->preallocated = PETSC_TRUE; 2347 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2348 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2349 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2350 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2351 if (d_nnz) { 2352 for (i=0; i<B->m; i++) { 2353 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2354 } 2355 } 2356 if (o_nnz) { 2357 for (i=0; i<B->m; i++) { 2358 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2359 } 2360 } 2361 b = (Mat_MPIAIJ*)B->data; 2362 2363 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2364 PetscLogObjectParent(B,b->A); 2365 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2366 PetscLogObjectParent(B,b->B); 2367 2368 PetscFunctionReturn(0); 2369 } 2370 2371 #undef __FUNCT__ 2372 #define __FUNCT__ "MatCreateMPIAIJ" 2373 /*@C 2374 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2375 (the default parallel PETSc format). For good matrix assembly performance 2376 the user should preallocate the matrix storage by setting the parameters 2377 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2378 performance can be increased by more than a factor of 50. 2379 2380 Collective on MPI_Comm 2381 2382 Input Parameters: 2383 + comm - MPI communicator 2384 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2385 This value should be the same as the local size used in creating the 2386 y vector for the matrix-vector product y = Ax. 2387 . n - This value should be the same as the local size used in creating the 2388 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2389 calculated if N is given) For square matrices n is almost always m. 2390 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2391 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2392 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2393 (same value is used for all local rows) 2394 . d_nnz - array containing the number of nonzeros in the various rows of the 2395 DIAGONAL portion of the local submatrix (possibly different for each row) 2396 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2397 The size of this array is equal to the number of local rows, i.e 'm'. 2398 You must leave room for the diagonal entry even if it is zero. 2399 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2400 submatrix (same value is used for all local rows). 2401 - o_nnz - array containing the number of nonzeros in the various rows of the 2402 OFF-DIAGONAL portion of the local submatrix (possibly different for 2403 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2404 structure. The size of this array is equal to the number 2405 of local rows, i.e 'm'. 2406 2407 Output Parameter: 2408 . A - the matrix 2409 2410 Notes: 2411 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2412 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2413 storage requirements for this matrix. 2414 2415 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2416 processor than it must be used on all processors that share the object for 2417 that argument. 2418 2419 The AIJ format (also called the Yale sparse matrix format or 2420 compressed row storage), is fully compatible with standard Fortran 77 2421 storage. That is, the stored row and column indices can begin at 2422 either one (as in Fortran) or zero. See the users manual for details. 2423 2424 The user MUST specify either the local or global matrix dimensions 2425 (possibly both). 2426 2427 The parallel matrix is partitioned such that the first m0 rows belong to 2428 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2429 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2430 2431 The DIAGONAL portion of the local submatrix of a processor can be defined 2432 as the submatrix which is obtained by extraction the part corresponding 2433 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2434 first row that belongs to the processor, and r2 is the last row belonging 2435 to the this processor. This is a square mxm matrix. The remaining portion 2436 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2437 2438 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2439 2440 By default, this format uses inodes (identical nodes) when possible. 2441 We search for consecutive rows with the same nonzero structure, thereby 2442 reusing matrix information to achieve increased efficiency. 2443 2444 Options Database Keys: 2445 + -mat_aij_no_inode - Do not use inodes 2446 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2447 - -mat_aij_oneindex - Internally use indexing starting at 1 2448 rather than 0. Note that when calling MatSetValues(), 2449 the user still MUST index entries starting at 0! 2450 2451 2452 Example usage: 2453 2454 Consider the following 8x8 matrix with 34 non-zero values, that is 2455 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2456 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2457 as follows: 2458 2459 .vb 2460 1 2 0 | 0 3 0 | 0 4 2461 Proc0 0 5 6 | 7 0 0 | 8 0 2462 9 0 10 | 11 0 0 | 12 0 2463 ------------------------------------- 2464 13 0 14 | 15 16 17 | 0 0 2465 Proc1 0 18 0 | 19 20 21 | 0 0 2466 0 0 0 | 22 23 0 | 24 0 2467 ------------------------------------- 2468 Proc2 25 26 27 | 0 0 28 | 29 0 2469 30 0 0 | 31 32 33 | 0 34 2470 .ve 2471 2472 This can be represented as a collection of submatrices as: 2473 2474 .vb 2475 A B C 2476 D E F 2477 G H I 2478 .ve 2479 2480 Where the submatrices A,B,C are owned by proc0, D,E,F are 2481 owned by proc1, G,H,I are owned by proc2. 2482 2483 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2484 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2485 The 'M','N' parameters are 8,8, and have the same values on all procs. 2486 2487 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2488 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2489 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2490 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2491 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2492 matrix, ans [DF] as another SeqAIJ matrix. 2493 2494 When d_nz, o_nz parameters are specified, d_nz storage elements are 2495 allocated for every row of the local diagonal submatrix, and o_nz 2496 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2497 One way to choose d_nz and o_nz is to use the max nonzerors per local 2498 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2499 In this case, the values of d_nz,o_nz are: 2500 .vb 2501 proc0 : dnz = 2, o_nz = 2 2502 proc1 : dnz = 3, o_nz = 2 2503 proc2 : dnz = 1, o_nz = 4 2504 .ve 2505 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2506 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2507 for proc3. i.e we are using 12+15+10=37 storage locations to store 2508 34 values. 2509 2510 When d_nnz, o_nnz parameters are specified, the storage is specified 2511 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2512 In the above case the values for d_nnz,o_nnz are: 2513 .vb 2514 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2515 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2516 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2517 .ve 2518 Here the space allocated is sum of all the above values i.e 34, and 2519 hence pre-allocation is perfect. 2520 2521 Level: intermediate 2522 2523 .keywords: matrix, aij, compressed row, sparse, parallel 2524 2525 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2526 @*/ 2527 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2528 { 2529 int ierr,size; 2530 2531 PetscFunctionBegin; 2532 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2533 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2534 if (size > 1) { 2535 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2536 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2537 } else { 2538 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2539 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2540 } 2541 PetscFunctionReturn(0); 2542 } 2543 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2546 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2547 { 2548 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2549 PetscFunctionBegin; 2550 *Ad = a->A; 2551 *Ao = a->B; 2552 *colmap = a->garray; 2553 PetscFunctionReturn(0); 2554 } 2555 2556 #undef __FUNCT__ 2557 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2558 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2559 { 2560 int ierr,i; 2561 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2562 2563 PetscFunctionBegin; 2564 if (coloring->ctype == IS_COLORING_LOCAL) { 2565 ISColoringValue *allcolors,*colors; 2566 ISColoring ocoloring; 2567 2568 /* set coloring for diagonal portion */ 2569 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2570 2571 /* set coloring for off-diagonal portion */ 2572 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2573 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2574 for (i=0; i<a->B->n; i++) { 2575 colors[i] = allcolors[a->garray[i]]; 2576 } 2577 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2578 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2579 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2580 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2581 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2582 ISColoringValue *colors; 2583 int *larray; 2584 ISColoring ocoloring; 2585 2586 /* set coloring for diagonal portion */ 2587 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2588 for (i=0; i<a->A->n; i++) { 2589 larray[i] = i + a->cstart; 2590 } 2591 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2592 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2593 for (i=0; i<a->A->n; i++) { 2594 colors[i] = coloring->colors[larray[i]]; 2595 } 2596 ierr = PetscFree(larray);CHKERRQ(ierr); 2597 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2598 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2599 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2600 2601 /* set coloring for off-diagonal portion */ 2602 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2603 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2604 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2605 for (i=0; i<a->B->n; i++) { 2606 colors[i] = coloring->colors[larray[i]]; 2607 } 2608 ierr = PetscFree(larray);CHKERRQ(ierr); 2609 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2610 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2611 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2612 } else { 2613 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2614 } 2615 2616 PetscFunctionReturn(0); 2617 } 2618 2619 #undef __FUNCT__ 2620 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2621 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2622 { 2623 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2624 int ierr; 2625 2626 PetscFunctionBegin; 2627 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2628 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2629 PetscFunctionReturn(0); 2630 } 2631 2632 #undef __FUNCT__ 2633 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2634 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2635 { 2636 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2637 int ierr; 2638 2639 PetscFunctionBegin; 2640 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2641 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2642 PetscFunctionReturn(0); 2643 } 2644 2645 #undef __FUNCT__ 2646 #define __FUNCT__ "MatMerge" 2647 /*@C 2648 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2649 matrices from each processor 2650 2651 Collective on MPI_Comm 2652 2653 Input Parameters: 2654 + comm - the communicators the parallel matrix will live on 2655 - inmat - the input sequential matrices 2656 2657 Output Parameter: 2658 . outmat - the parallel matrix generated 2659 2660 Level: advanced 2661 2662 Notes: The number of columns of the matrix in EACH of the seperate files 2663 MUST be the same. 2664 2665 @*/ 2666 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2667 { 2668 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2669 PetscScalar *values; 2670 PetscMap columnmap,rowmap; 2671 2672 PetscFunctionBegin; 2673 2674 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2675 2676 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2677 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2678 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2679 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2680 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2681 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2682 2683 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2684 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2685 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2686 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2687 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2688 2689 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2690 for (i=0;i<m;i++) { 2691 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2692 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2693 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2694 } 2695 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2696 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2697 2698 for (i=0;i<m;i++) { 2699 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2700 I = i + rstart; 2701 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2702 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2703 } 2704 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2705 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2706 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2707 2708 PetscFunctionReturn(0); 2709 } 2710 2711 #undef __FUNCT__ 2712 #define __FUNCT__ "MatFileSplit" 2713 int MatFileSplit(Mat A,char *outfile) 2714 { 2715 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2716 PetscViewer out; 2717 char *name; 2718 Mat B; 2719 PetscScalar *values; 2720 2721 PetscFunctionBegin; 2722 2723 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2724 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2725 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2726 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2727 for (i=0;i<m;i++) { 2728 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2729 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2730 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2731 } 2732 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2733 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2734 2735 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2736 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2737 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2738 sprintf(name,"%s.%d",outfile,rank); 2739 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2740 ierr = PetscFree(name); 2741 ierr = MatView(B,out);CHKERRQ(ierr); 2742 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2743 ierr = MatDestroy(B);CHKERRQ(ierr); 2744 PetscFunctionReturn(0); 2745 } 2746