1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 #if defined(PETSC_HAVE_SUPERLUDIST) 413 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 414 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 415 #endif 416 417 #if defined(PETSC_HAVE_SPOOLES) 418 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 419 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 420 #endif 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 426 int MatZeroEntries_MPIAIJ(Mat A) 427 { 428 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 429 int ierr; 430 431 PetscFunctionBegin; 432 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 433 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 434 PetscFunctionReturn(0); 435 } 436 437 #undef __FUNCT__ 438 #define __FUNCT__ "MatZeroRows_MPIAIJ" 439 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 440 { 441 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 442 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 443 int *nprocs,j,idx,nsends,row; 444 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 445 int *rvalues,tag = A->tag,count,base,slen,n,*source; 446 int *lens,imdex,*lrows,*values,rstart=l->rstart; 447 MPI_Comm comm = A->comm; 448 MPI_Request *send_waits,*recv_waits; 449 MPI_Status recv_status,*send_status; 450 IS istmp; 451 PetscTruth found; 452 453 PetscFunctionBegin; 454 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 455 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 456 457 /* first count number of contributors to each processor */ 458 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 459 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 460 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 461 for (i=0; i<N; i++) { 462 idx = rows[i]; 463 found = PETSC_FALSE; 464 for (j=0; j<size; j++) { 465 if (idx >= owners[j] && idx < owners[j+1]) { 466 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 467 } 468 } 469 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 470 } 471 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 472 473 /* inform other processors of number of messages and max length*/ 474 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 475 476 /* post receives: */ 477 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 478 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 479 for (i=0; i<nrecvs; i++) { 480 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 481 } 482 483 /* do sends: 484 1) starts[i] gives the starting index in svalues for stuff going to 485 the ith processor 486 */ 487 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 488 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 489 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 490 starts[0] = 0; 491 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 492 for (i=0; i<N; i++) { 493 svalues[starts[owner[i]]++] = rows[i]; 494 } 495 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 496 497 starts[0] = 0; 498 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 499 count = 0; 500 for (i=0; i<size; i++) { 501 if (nprocs[2*i+1]) { 502 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 503 } 504 } 505 ierr = PetscFree(starts);CHKERRQ(ierr); 506 507 base = owners[rank]; 508 509 /* wait on receives */ 510 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 511 source = lens + nrecvs; 512 count = nrecvs; slen = 0; 513 while (count) { 514 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 515 /* unpack receives into our local space */ 516 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 517 source[imdex] = recv_status.MPI_SOURCE; 518 lens[imdex] = n; 519 slen += n; 520 count--; 521 } 522 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 523 524 /* move the data into the send scatter */ 525 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 526 count = 0; 527 for (i=0; i<nrecvs; i++) { 528 values = rvalues + i*nmax; 529 for (j=0; j<lens[i]; j++) { 530 lrows[count++] = values[j] - base; 531 } 532 } 533 ierr = PetscFree(rvalues);CHKERRQ(ierr); 534 ierr = PetscFree(lens);CHKERRQ(ierr); 535 ierr = PetscFree(owner);CHKERRQ(ierr); 536 ierr = PetscFree(nprocs);CHKERRQ(ierr); 537 538 /* actually zap the local rows */ 539 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 540 PetscLogObjectParent(A,istmp); 541 542 /* 543 Zero the required rows. If the "diagonal block" of the matrix 544 is square and the user wishes to set the diagonal we use seperate 545 code so that MatSetValues() is not called for each diagonal allocating 546 new memory, thus calling lots of mallocs and slowing things down. 547 548 Contributed by: Mathew Knepley 549 */ 550 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 551 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 552 if (diag && (l->A->M == l->A->N)) { 553 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 554 } else if (diag) { 555 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 556 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 557 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 558 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 559 } 560 for (i = 0; i < slen; i++) { 561 row = lrows[i] + rstart; 562 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 563 } 564 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 565 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 566 } else { 567 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 568 } 569 ierr = ISDestroy(istmp);CHKERRQ(ierr); 570 ierr = PetscFree(lrows);CHKERRQ(ierr); 571 572 /* wait on sends */ 573 if (nsends) { 574 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 575 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 576 ierr = PetscFree(send_status);CHKERRQ(ierr); 577 } 578 ierr = PetscFree(send_waits);CHKERRQ(ierr); 579 ierr = PetscFree(svalues);CHKERRQ(ierr); 580 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatMult_MPIAIJ" 586 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 587 { 588 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 589 int ierr,nt; 590 591 PetscFunctionBegin; 592 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 593 if (nt != A->n) { 594 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 595 } 596 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 597 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 598 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 603 #undef __FUNCT__ 604 #define __FUNCT__ "MatMultAdd_MPIAIJ" 605 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 606 { 607 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 608 int ierr; 609 610 PetscFunctionBegin; 611 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 612 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 613 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 614 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 618 #undef __FUNCT__ 619 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 620 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 621 { 622 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 623 int ierr; 624 625 PetscFunctionBegin; 626 /* do nondiagonal part */ 627 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 628 /* send it on its way */ 629 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 /* do local part */ 631 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 632 /* receive remote parts: note this assumes the values are not actually */ 633 /* inserted in yy until the next line, which is true for my implementation*/ 634 /* but is not perhaps always true. */ 635 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 639 EXTERN_C_BEGIN 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 642 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 643 { 644 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 645 Mat A = Aij->A, B,Aoff = Aij->B,Boff; 646 MatType type; 647 IS Me,Notme; 648 int M,N,first,last,*notme,i, ierr; 649 650 PetscFunctionBegin; 651 652 /* Compatible types */ 653 ierr = MatGetType(Bmat,&type); CHKERRQ(ierr); 654 ierr = PetscStrcmp(type,MATMPIAIJ,f); CHKERRQ(ierr); 655 if (!*f) SETERRQ(1,"Second matrix needs to be MPIAIJ too"); 656 657 /* Easy test: symmetric diagonal block */ 658 Bij = (Mat_MPIAIJ *) Bmat->data; B = Bij->B; 659 ierr = MatIsSymmetric(A,B,f); CHKERRQ(ierr); 660 if (!*f) PetscFunctionReturn(0); 661 662 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 663 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 664 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 665 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 666 for (i=0; i<first; i++) notme[i] = i; 667 for (i=last; i<M; i++) notme[i-last+first] = i; 668 ierr = ISCreateGeneral 669 (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 670 ierr = ISCreateStride 671 (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 672 ierr = MatGetSubMatrix 673 (B,Notme,Me,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Boff); CHKERRQ(ierr); 674 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 675 ierr = MatDestroy(Boff); CHKERRQ(ierr); 676 ierr = ISDestroy(Me); CHKERRQ(ierr); 677 ierr = ISDestroy(Notme); CHKERRQ(ierr); 678 679 PetscFunctionReturn(0); 680 } 681 EXTERN_C_END 682 683 #undef __FUNCT__ 684 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 685 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 686 { 687 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 688 int ierr; 689 690 PetscFunctionBegin; 691 /* do nondiagonal part */ 692 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 693 /* send it on its way */ 694 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 695 /* do local part */ 696 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 697 /* receive remote parts: note this assumes the values are not actually */ 698 /* inserted in yy until the next line, which is true for my implementation*/ 699 /* but is not perhaps always true. */ 700 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 /* 705 This only works correctly for square matrices where the subblock A->A is the 706 diagonal block 707 */ 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 710 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 711 { 712 int ierr; 713 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 714 715 PetscFunctionBegin; 716 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 717 if (a->rstart != a->cstart || a->rend != a->cend) { 718 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 719 } 720 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 721 PetscFunctionReturn(0); 722 } 723 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatScale_MPIAIJ" 726 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 727 { 728 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 729 int ierr; 730 731 PetscFunctionBegin; 732 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 733 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 734 PetscFunctionReturn(0); 735 } 736 737 #undef __FUNCT__ 738 #define __FUNCT__ "MatDestroy_MPIAIJ" 739 int MatDestroy_MPIAIJ(Mat mat) 740 { 741 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 742 int ierr; 743 744 PetscFunctionBegin; 745 #if defined(PETSC_USE_LOG) 746 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 747 #endif 748 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 749 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 750 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 751 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 752 #if defined (PETSC_USE_CTABLE) 753 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 754 #else 755 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 756 #endif 757 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 758 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 759 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 760 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 761 ierr = PetscFree(aij);CHKERRQ(ierr); 762 PetscFunctionReturn(0); 763 } 764 765 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 766 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatView_MPIAIJ_Binary" 770 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 771 { 772 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 773 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 774 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 775 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 776 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 777 PetscScalar *column_values; 778 779 PetscFunctionBegin; 780 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 781 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 782 nz = A->nz + B->nz; 783 if (rank == 0) { 784 header[0] = MAT_FILE_COOKIE; 785 header[1] = mat->M; 786 header[2] = mat->N; 787 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 788 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 790 /* get largest number of rows any processor has */ 791 rlen = mat->m; 792 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 793 for (i=1; i<size; i++) { 794 rlen = PetscMax(rlen,range[i+1] - range[i]); 795 } 796 } else { 797 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 798 rlen = mat->m; 799 } 800 801 /* load up the local row counts */ 802 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 803 for (i=0; i<mat->m; i++) { 804 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 805 } 806 807 /* store the row lengths to the file */ 808 if (rank == 0) { 809 MPI_Status status; 810 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 811 for (i=1; i<size; i++) { 812 rlen = range[i+1] - range[i]; 813 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 814 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 815 } 816 } else { 817 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 818 } 819 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 820 821 /* load up the local column indices */ 822 nzmax = nz; /* )th processor needs space a largest processor needs */ 823 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 824 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 825 cnt = 0; 826 for (i=0; i<mat->m; i++) { 827 for (j=B->i[i]; j<B->i[i+1]; j++) { 828 if ( (col = garray[B->j[j]]) > cstart) break; 829 column_indices[cnt++] = col; 830 } 831 for (k=A->i[i]; k<A->i[i+1]; k++) { 832 column_indices[cnt++] = A->j[k] + cstart; 833 } 834 for (; j<B->i[i+1]; j++) { 835 column_indices[cnt++] = garray[B->j[j]]; 836 } 837 } 838 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 839 840 /* store the column indices to the file */ 841 if (rank == 0) { 842 MPI_Status status; 843 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 844 for (i=1; i<size; i++) { 845 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 846 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 847 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 848 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 849 } 850 } else { 851 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 852 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 853 } 854 ierr = PetscFree(column_indices);CHKERRQ(ierr); 855 856 /* load up the local column values */ 857 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 858 cnt = 0; 859 for (i=0; i<mat->m; i++) { 860 for (j=B->i[i]; j<B->i[i+1]; j++) { 861 if ( garray[B->j[j]] > cstart) break; 862 column_values[cnt++] = B->a[j]; 863 } 864 for (k=A->i[i]; k<A->i[i+1]; k++) { 865 column_values[cnt++] = A->a[k]; 866 } 867 for (; j<B->i[i+1]; j++) { 868 column_values[cnt++] = B->a[j]; 869 } 870 } 871 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 872 873 /* store the column values to the file */ 874 if (rank == 0) { 875 MPI_Status status; 876 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 877 for (i=1; i<size; i++) { 878 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 879 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 880 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 881 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 882 } 883 } else { 884 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 885 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 886 } 887 ierr = PetscFree(column_values);CHKERRQ(ierr); 888 PetscFunctionReturn(0); 889 } 890 891 #undef __FUNCT__ 892 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 893 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 894 { 895 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 896 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 897 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 898 PetscTruth isdraw,isascii,flg,isbinary; 899 PetscViewer sviewer; 900 PetscViewerFormat format; 901 902 PetscFunctionBegin; 903 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 904 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 905 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 906 if (isascii) { 907 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 908 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 909 MatInfo info; 910 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 911 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 912 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 913 if (flg) { 914 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 915 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 916 } else { 917 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 918 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 919 } 920 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 921 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 922 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 923 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 924 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 925 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 926 PetscFunctionReturn(0); 927 } else if (format == PETSC_VIEWER_ASCII_INFO) { 928 PetscFunctionReturn(0); 929 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 930 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 931 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 932 #endif 933 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 934 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 935 #endif 936 937 PetscFunctionReturn(0); 938 } 939 } else if (isbinary) { 940 if (size == 1) { 941 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 942 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 943 } else { 944 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 945 } 946 PetscFunctionReturn(0); 947 } else if (isdraw) { 948 PetscDraw draw; 949 PetscTruth isnull; 950 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 951 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 952 } 953 954 if (size == 1) { 955 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 956 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 957 } else { 958 /* assemble the entire matrix onto first processor. */ 959 Mat A; 960 Mat_SeqAIJ *Aloc; 961 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 962 PetscScalar *a; 963 964 if (!rank) { 965 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 966 } else { 967 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 968 } 969 PetscLogObjectParent(mat,A); 970 971 /* copy over the A part */ 972 Aloc = (Mat_SeqAIJ*)aij->A->data; 973 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 974 row = aij->rstart; 975 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 976 for (i=0; i<m; i++) { 977 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 978 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 979 } 980 aj = Aloc->j; 981 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 982 983 /* copy over the B part */ 984 Aloc = (Mat_SeqAIJ*)aij->B->data; 985 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 986 row = aij->rstart; 987 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 988 ct = cols; 989 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 990 for (i=0; i<m; i++) { 991 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 992 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 993 } 994 ierr = PetscFree(ct);CHKERRQ(ierr); 995 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 996 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 997 /* 998 Everyone has to call to draw the matrix since the graphics waits are 999 synchronized across all processors that share the PetscDraw object 1000 */ 1001 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1002 if (!rank) { 1003 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1004 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1005 } 1006 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1007 ierr = MatDestroy(A);CHKERRQ(ierr); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatView_MPIAIJ" 1014 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1015 { 1016 int ierr; 1017 PetscTruth isascii,isdraw,issocket,isbinary; 1018 1019 PetscFunctionBegin; 1020 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1021 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1022 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1023 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1024 if (isascii || isdraw || isbinary || issocket) { 1025 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1026 } else { 1027 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1028 } 1029 PetscFunctionReturn(0); 1030 } 1031 1032 1033 1034 #undef __FUNCT__ 1035 #define __FUNCT__ "MatRelax_MPIAIJ" 1036 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1037 { 1038 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1039 int ierr; 1040 Vec bb1; 1041 PetscScalar mone=-1.0; 1042 1043 PetscFunctionBegin; 1044 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1045 1046 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1047 1048 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1049 if (flag & SOR_ZERO_INITIAL_GUESS) { 1050 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1051 its--; 1052 } 1053 1054 while (its--) { 1055 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1056 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1057 1058 /* update rhs: bb1 = bb - B*x */ 1059 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1060 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1061 1062 /* local sweep */ 1063 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1064 CHKERRQ(ierr); 1065 } 1066 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1067 if (flag & SOR_ZERO_INITIAL_GUESS) { 1068 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1069 its--; 1070 } 1071 while (its--) { 1072 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1073 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1074 1075 /* update rhs: bb1 = bb - B*x */ 1076 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1077 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1078 1079 /* local sweep */ 1080 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1081 CHKERRQ(ierr); 1082 } 1083 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1084 if (flag & SOR_ZERO_INITIAL_GUESS) { 1085 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1086 its--; 1087 } 1088 while (its--) { 1089 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1090 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1091 1092 /* update rhs: bb1 = bb - B*x */ 1093 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1094 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1095 1096 /* local sweep */ 1097 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1098 CHKERRQ(ierr); 1099 } 1100 } else { 1101 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1102 } 1103 1104 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1110 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1111 { 1112 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1113 Mat A = mat->A,B = mat->B; 1114 int ierr; 1115 PetscReal isend[5],irecv[5]; 1116 1117 PetscFunctionBegin; 1118 info->block_size = 1.0; 1119 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1120 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1121 isend[3] = info->memory; isend[4] = info->mallocs; 1122 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1123 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1124 isend[3] += info->memory; isend[4] += info->mallocs; 1125 if (flag == MAT_LOCAL) { 1126 info->nz_used = isend[0]; 1127 info->nz_allocated = isend[1]; 1128 info->nz_unneeded = isend[2]; 1129 info->memory = isend[3]; 1130 info->mallocs = isend[4]; 1131 } else if (flag == MAT_GLOBAL_MAX) { 1132 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1133 info->nz_used = irecv[0]; 1134 info->nz_allocated = irecv[1]; 1135 info->nz_unneeded = irecv[2]; 1136 info->memory = irecv[3]; 1137 info->mallocs = irecv[4]; 1138 } else if (flag == MAT_GLOBAL_SUM) { 1139 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1140 info->nz_used = irecv[0]; 1141 info->nz_allocated = irecv[1]; 1142 info->nz_unneeded = irecv[2]; 1143 info->memory = irecv[3]; 1144 info->mallocs = irecv[4]; 1145 } 1146 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1147 info->fill_ratio_needed = 0; 1148 info->factor_mallocs = 0; 1149 info->rows_global = (double)matin->M; 1150 info->columns_global = (double)matin->N; 1151 info->rows_local = (double)matin->m; 1152 info->columns_local = (double)matin->N; 1153 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MatSetOption_MPIAIJ" 1159 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1160 { 1161 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1162 int ierr; 1163 1164 PetscFunctionBegin; 1165 switch (op) { 1166 case MAT_NO_NEW_NONZERO_LOCATIONS: 1167 case MAT_YES_NEW_NONZERO_LOCATIONS: 1168 case MAT_COLUMNS_UNSORTED: 1169 case MAT_COLUMNS_SORTED: 1170 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1171 case MAT_KEEP_ZEROED_ROWS: 1172 case MAT_NEW_NONZERO_LOCATION_ERR: 1173 case MAT_USE_INODES: 1174 case MAT_DO_NOT_USE_INODES: 1175 case MAT_IGNORE_ZERO_ENTRIES: 1176 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1177 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1178 break; 1179 case MAT_ROW_ORIENTED: 1180 a->roworiented = PETSC_TRUE; 1181 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1182 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1183 break; 1184 case MAT_ROWS_SORTED: 1185 case MAT_ROWS_UNSORTED: 1186 case MAT_YES_NEW_DIAGONALS: 1187 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1188 break; 1189 case MAT_COLUMN_ORIENTED: 1190 a->roworiented = PETSC_FALSE; 1191 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1192 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1193 break; 1194 case MAT_IGNORE_OFF_PROC_ENTRIES: 1195 a->donotstash = PETSC_TRUE; 1196 break; 1197 case MAT_NO_NEW_DIAGONALS: 1198 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1199 default: 1200 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1201 } 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatGetRow_MPIAIJ" 1207 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1208 { 1209 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1210 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1211 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1212 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1213 int *cmap,*idx_p; 1214 1215 PetscFunctionBegin; 1216 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1217 mat->getrowactive = PETSC_TRUE; 1218 1219 if (!mat->rowvalues && (idx || v)) { 1220 /* 1221 allocate enough space to hold information from the longest row. 1222 */ 1223 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1224 int max = 1,tmp; 1225 for (i=0; i<matin->m; i++) { 1226 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1227 if (max < tmp) { max = tmp; } 1228 } 1229 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1230 mat->rowindices = (int*)(mat->rowvalues + max); 1231 } 1232 1233 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1234 lrow = row - rstart; 1235 1236 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1237 if (!v) {pvA = 0; pvB = 0;} 1238 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1239 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1240 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1241 nztot = nzA + nzB; 1242 1243 cmap = mat->garray; 1244 if (v || idx) { 1245 if (nztot) { 1246 /* Sort by increasing column numbers, assuming A and B already sorted */ 1247 int imark = -1; 1248 if (v) { 1249 *v = v_p = mat->rowvalues; 1250 for (i=0; i<nzB; i++) { 1251 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1252 else break; 1253 } 1254 imark = i; 1255 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1256 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1257 } 1258 if (idx) { 1259 *idx = idx_p = mat->rowindices; 1260 if (imark > -1) { 1261 for (i=0; i<imark; i++) { 1262 idx_p[i] = cmap[cworkB[i]]; 1263 } 1264 } else { 1265 for (i=0; i<nzB; i++) { 1266 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1267 else break; 1268 } 1269 imark = i; 1270 } 1271 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1272 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1273 } 1274 } else { 1275 if (idx) *idx = 0; 1276 if (v) *v = 0; 1277 } 1278 } 1279 *nz = nztot; 1280 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1281 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1287 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1288 { 1289 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1290 1291 PetscFunctionBegin; 1292 if (aij->getrowactive == PETSC_FALSE) { 1293 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1294 } 1295 aij->getrowactive = PETSC_FALSE; 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatNorm_MPIAIJ" 1301 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1302 { 1303 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1304 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1305 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1306 PetscReal sum = 0.0; 1307 PetscScalar *v; 1308 1309 PetscFunctionBegin; 1310 if (aij->size == 1) { 1311 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1312 } else { 1313 if (type == NORM_FROBENIUS) { 1314 v = amat->a; 1315 for (i=0; i<amat->nz; i++) { 1316 #if defined(PETSC_USE_COMPLEX) 1317 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1318 #else 1319 sum += (*v)*(*v); v++; 1320 #endif 1321 } 1322 v = bmat->a; 1323 for (i=0; i<bmat->nz; i++) { 1324 #if defined(PETSC_USE_COMPLEX) 1325 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1326 #else 1327 sum += (*v)*(*v); v++; 1328 #endif 1329 } 1330 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1331 *norm = sqrt(*norm); 1332 } else if (type == NORM_1) { /* max column norm */ 1333 PetscReal *tmp,*tmp2; 1334 int *jj,*garray = aij->garray; 1335 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1336 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1337 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1338 *norm = 0.0; 1339 v = amat->a; jj = amat->j; 1340 for (j=0; j<amat->nz; j++) { 1341 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1342 } 1343 v = bmat->a; jj = bmat->j; 1344 for (j=0; j<bmat->nz; j++) { 1345 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1346 } 1347 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1348 for (j=0; j<mat->N; j++) { 1349 if (tmp2[j] > *norm) *norm = tmp2[j]; 1350 } 1351 ierr = PetscFree(tmp);CHKERRQ(ierr); 1352 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1353 } else if (type == NORM_INFINITY) { /* max row norm */ 1354 PetscReal ntemp = 0.0; 1355 for (j=0; j<aij->A->m; j++) { 1356 v = amat->a + amat->i[j] + shift; 1357 sum = 0.0; 1358 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1359 sum += PetscAbsScalar(*v); v++; 1360 } 1361 v = bmat->a + bmat->i[j] + shift; 1362 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1363 sum += PetscAbsScalar(*v); v++; 1364 } 1365 if (sum > ntemp) ntemp = sum; 1366 } 1367 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1368 } else { 1369 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1370 } 1371 } 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatTranspose_MPIAIJ" 1377 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1378 { 1379 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1380 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1381 int ierr,shift = Aloc->indexshift; 1382 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1383 Mat B; 1384 PetscScalar *array; 1385 1386 PetscFunctionBegin; 1387 if (!matout && M != N) { 1388 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1389 } 1390 1391 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1392 1393 /* copy over the A part */ 1394 Aloc = (Mat_SeqAIJ*)a->A->data; 1395 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1396 row = a->rstart; 1397 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1398 for (i=0; i<m; i++) { 1399 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1400 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1401 } 1402 aj = Aloc->j; 1403 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1404 1405 /* copy over the B part */ 1406 Aloc = (Mat_SeqAIJ*)a->B->data; 1407 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1408 row = a->rstart; 1409 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1410 ct = cols; 1411 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1412 for (i=0; i<m; i++) { 1413 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1414 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1415 } 1416 ierr = PetscFree(ct);CHKERRQ(ierr); 1417 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1418 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1419 if (matout) { 1420 *matout = B; 1421 } else { 1422 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1423 } 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1429 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1430 { 1431 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1432 Mat a = aij->A,b = aij->B; 1433 int ierr,s1,s2,s3; 1434 1435 PetscFunctionBegin; 1436 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1437 if (rr) { 1438 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1439 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1440 /* Overlap communication with computation. */ 1441 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1442 } 1443 if (ll) { 1444 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1445 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1446 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1447 } 1448 /* scale the diagonal block */ 1449 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1450 1451 if (rr) { 1452 /* Do a scatter end and then right scale the off-diagonal block */ 1453 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1454 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1455 } 1456 1457 PetscFunctionReturn(0); 1458 } 1459 1460 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1463 int MatPrintHelp_MPIAIJ(Mat A) 1464 { 1465 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1466 int ierr; 1467 1468 PetscFunctionBegin; 1469 if (!a->rank) { 1470 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1471 } 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1477 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1478 { 1479 PetscFunctionBegin; 1480 *bs = 1; 1481 PetscFunctionReturn(0); 1482 } 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1485 int MatSetUnfactored_MPIAIJ(Mat A) 1486 { 1487 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1488 int ierr; 1489 1490 PetscFunctionBegin; 1491 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatEqual_MPIAIJ" 1497 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1498 { 1499 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1500 Mat a,b,c,d; 1501 PetscTruth flg; 1502 int ierr; 1503 1504 PetscFunctionBegin; 1505 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1506 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1507 a = matA->A; b = matA->B; 1508 c = matB->A; d = matB->B; 1509 1510 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1511 if (flg == PETSC_TRUE) { 1512 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1513 } 1514 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatCopy_MPIAIJ" 1520 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1521 { 1522 int ierr; 1523 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1524 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1525 PetscTruth flg; 1526 1527 PetscFunctionBegin; 1528 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1529 if (str != SAME_NONZERO_PATTERN || !flg) { 1530 /* because of the column compression in the off-processor part of the matrix a->B, 1531 the number of columns in a->B and b->B may be different, hence we cannot call 1532 the MatCopy() directly on the two parts. If need be, we can provide a more 1533 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1534 then copying the submatrices */ 1535 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1536 } else { 1537 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1538 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1539 } 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1545 int MatSetUpPreallocation_MPIAIJ(Mat A) 1546 { 1547 int ierr; 1548 1549 PetscFunctionBegin; 1550 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1555 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1556 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1557 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1558 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1559 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1560 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1561 #endif 1562 1563 #include "petscblaslapack.h" 1564 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure); 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatAXPY_MPIAIJ" 1567 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1568 { 1569 int ierr,one=1,i; 1570 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1571 Mat_SeqAIJ *x,*y; 1572 1573 PetscFunctionBegin; 1574 if (str == SAME_NONZERO_PATTERN) { 1575 x = (Mat_SeqAIJ *)xx->A->data; 1576 y = (Mat_SeqAIJ *)yy->A->data; 1577 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1578 x = (Mat_SeqAIJ *)xx->B->data; 1579 y = (Mat_SeqAIJ *)yy->B->data; 1580 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1581 } else if (str == SUBSET_NONZERO_PATTERN) { 1582 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1583 1584 x = (Mat_SeqAIJ *)xx->B->data; 1585 y = (Mat_SeqAIJ *)yy->B->data; 1586 if (y->xtoy && y->XtoY != xx->B) { 1587 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1588 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1589 } 1590 if (!y->xtoy) { /* get xtoy */ 1591 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1592 y->XtoY = xx->B; 1593 } 1594 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1595 } else { 1596 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1597 } 1598 PetscFunctionReturn(0); 1599 } 1600 1601 /* -------------------------------------------------------------------*/ 1602 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1603 MatGetRow_MPIAIJ, 1604 MatRestoreRow_MPIAIJ, 1605 MatMult_MPIAIJ, 1606 MatMultAdd_MPIAIJ, 1607 MatMultTranspose_MPIAIJ, 1608 MatMultTransposeAdd_MPIAIJ, 1609 0, 1610 0, 1611 0, 1612 0, 1613 0, 1614 0, 1615 MatRelax_MPIAIJ, 1616 MatTranspose_MPIAIJ, 1617 MatGetInfo_MPIAIJ, 1618 MatEqual_MPIAIJ, 1619 MatGetDiagonal_MPIAIJ, 1620 MatDiagonalScale_MPIAIJ, 1621 MatNorm_MPIAIJ, 1622 MatAssemblyBegin_MPIAIJ, 1623 MatAssemblyEnd_MPIAIJ, 1624 0, 1625 MatSetOption_MPIAIJ, 1626 MatZeroEntries_MPIAIJ, 1627 MatZeroRows_MPIAIJ, 1628 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1629 MatLUFactorSymbolic_MPIAIJ_TFS, 1630 #else 1631 0, 1632 #endif 1633 0, 1634 0, 1635 0, 1636 MatSetUpPreallocation_MPIAIJ, 1637 0, 1638 0, 1639 0, 1640 0, 1641 MatDuplicate_MPIAIJ, 1642 0, 1643 0, 1644 0, 1645 0, 1646 MatAXPY_MPIAIJ, 1647 MatGetSubMatrices_MPIAIJ, 1648 MatIncreaseOverlap_MPIAIJ, 1649 MatGetValues_MPIAIJ, 1650 MatCopy_MPIAIJ, 1651 MatPrintHelp_MPIAIJ, 1652 MatScale_MPIAIJ, 1653 0, 1654 0, 1655 0, 1656 MatGetBlockSize_MPIAIJ, 1657 0, 1658 0, 1659 0, 1660 0, 1661 MatFDColoringCreate_MPIAIJ, 1662 0, 1663 MatSetUnfactored_MPIAIJ, 1664 0, 1665 0, 1666 MatGetSubMatrix_MPIAIJ, 1667 MatDestroy_MPIAIJ, 1668 MatView_MPIAIJ, 1669 MatGetPetscMaps_Petsc, 1670 0, 1671 0, 1672 0, 1673 0, 1674 0, 1675 0, 1676 0, 1677 0, 1678 MatSetColoring_MPIAIJ, 1679 MatSetValuesAdic_MPIAIJ, 1680 MatSetValuesAdifor_MPIAIJ 1681 }; 1682 1683 /* ----------------------------------------------------------------------------------------*/ 1684 1685 EXTERN_C_BEGIN 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1688 int MatStoreValues_MPIAIJ(Mat mat) 1689 { 1690 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1691 int ierr; 1692 1693 PetscFunctionBegin; 1694 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1695 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1696 PetscFunctionReturn(0); 1697 } 1698 EXTERN_C_END 1699 1700 EXTERN_C_BEGIN 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1703 int MatRetrieveValues_MPIAIJ(Mat mat) 1704 { 1705 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1706 int ierr; 1707 1708 PetscFunctionBegin; 1709 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1710 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1711 PetscFunctionReturn(0); 1712 } 1713 EXTERN_C_END 1714 1715 #include "petscpc.h" 1716 EXTERN_C_BEGIN 1717 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1718 EXTERN_C_END 1719 1720 EXTERN_C_BEGIN 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatCreate_MPIAIJ" 1723 int MatCreate_MPIAIJ(Mat B) 1724 { 1725 Mat_MPIAIJ *b; 1726 int ierr,i,size; 1727 1728 PetscFunctionBegin; 1729 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1730 1731 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1732 B->data = (void*)b; 1733 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1734 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1735 B->factor = 0; 1736 B->assembled = PETSC_FALSE; 1737 B->mapping = 0; 1738 1739 B->insertmode = NOT_SET_VALUES; 1740 b->size = size; 1741 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1742 1743 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1744 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1745 1746 /* the information in the maps duplicates the information computed below, eventually 1747 we should remove the duplicate information that is not contained in the maps */ 1748 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1749 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1750 1751 /* build local table of row and column ownerships */ 1752 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1753 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1754 b->cowners = b->rowners + b->size + 2; 1755 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1756 b->rowners[0] = 0; 1757 for (i=2; i<=b->size; i++) { 1758 b->rowners[i] += b->rowners[i-1]; 1759 } 1760 b->rstart = b->rowners[b->rank]; 1761 b->rend = b->rowners[b->rank+1]; 1762 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1763 b->cowners[0] = 0; 1764 for (i=2; i<=b->size; i++) { 1765 b->cowners[i] += b->cowners[i-1]; 1766 } 1767 b->cstart = b->cowners[b->rank]; 1768 b->cend = b->cowners[b->rank+1]; 1769 1770 /* build cache for off array entries formed */ 1771 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1772 b->donotstash = PETSC_FALSE; 1773 b->colmap = 0; 1774 b->garray = 0; 1775 b->roworiented = PETSC_TRUE; 1776 1777 /* stuff used for matrix vector multiply */ 1778 b->lvec = PETSC_NULL; 1779 b->Mvctx = PETSC_NULL; 1780 1781 /* stuff for MatGetRow() */ 1782 b->rowindices = 0; 1783 b->rowvalues = 0; 1784 b->getrowactive = PETSC_FALSE; 1785 1786 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1787 "MatStoreValues_MPIAIJ", 1788 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1789 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1790 "MatRetrieveValues_MPIAIJ", 1791 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1792 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1793 "MatGetDiagonalBlock_MPIAIJ", 1794 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1795 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1796 "MatIsSymmetric_MPIAIJ", 1797 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1798 PetscFunctionReturn(0); 1799 } 1800 EXTERN_C_END 1801 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1804 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1805 { 1806 Mat mat; 1807 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1808 int ierr; 1809 1810 PetscFunctionBegin; 1811 *newmat = 0; 1812 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1813 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1814 a = (Mat_MPIAIJ*)mat->data; 1815 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1816 mat->factor = matin->factor; 1817 mat->assembled = PETSC_TRUE; 1818 mat->insertmode = NOT_SET_VALUES; 1819 mat->preallocated = PETSC_TRUE; 1820 1821 a->rstart = oldmat->rstart; 1822 a->rend = oldmat->rend; 1823 a->cstart = oldmat->cstart; 1824 a->cend = oldmat->cend; 1825 a->size = oldmat->size; 1826 a->rank = oldmat->rank; 1827 a->donotstash = oldmat->donotstash; 1828 a->roworiented = oldmat->roworiented; 1829 a->rowindices = 0; 1830 a->rowvalues = 0; 1831 a->getrowactive = PETSC_FALSE; 1832 1833 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1834 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1835 if (oldmat->colmap) { 1836 #if defined (PETSC_USE_CTABLE) 1837 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1838 #else 1839 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1840 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1841 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1842 #endif 1843 } else a->colmap = 0; 1844 if (oldmat->garray) { 1845 int len; 1846 len = oldmat->B->n; 1847 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1848 PetscLogObjectMemory(mat,len*sizeof(int)); 1849 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1850 } else a->garray = 0; 1851 1852 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1853 PetscLogObjectParent(mat,a->lvec); 1854 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1855 PetscLogObjectParent(mat,a->Mvctx); 1856 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1857 PetscLogObjectParent(mat,a->A); 1858 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1859 PetscLogObjectParent(mat,a->B); 1860 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1861 *newmat = mat; 1862 PetscFunctionReturn(0); 1863 } 1864 1865 #include "petscsys.h" 1866 1867 EXTERN_C_BEGIN 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "MatLoad_MPIAIJ" 1870 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1871 { 1872 Mat A; 1873 PetscScalar *vals,*svals; 1874 MPI_Comm comm = ((PetscObject)viewer)->comm; 1875 MPI_Status status; 1876 int i,nz,ierr,j,rstart,rend,fd; 1877 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1878 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1879 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1880 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) 1881 PetscTruth flag; 1882 #endif 1883 1884 PetscFunctionBegin; 1885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1886 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1887 if (!rank) { 1888 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1889 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1890 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1891 if (header[3] < 0) { 1892 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1893 } 1894 } 1895 1896 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1897 M = header[1]; N = header[2]; 1898 /* determine ownership of all rows */ 1899 m = M/size + ((M % size) > rank); 1900 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1901 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1902 rowners[0] = 0; 1903 for (i=2; i<=size; i++) { 1904 rowners[i] += rowners[i-1]; 1905 } 1906 rstart = rowners[rank]; 1907 rend = rowners[rank+1]; 1908 1909 /* distribute row lengths to all processors */ 1910 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1911 offlens = ourlens + (rend-rstart); 1912 if (!rank) { 1913 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1914 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1915 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1916 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1917 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1918 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1919 } else { 1920 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1921 } 1922 1923 if (!rank) { 1924 /* calculate the number of nonzeros on each processor */ 1925 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1926 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1927 for (i=0; i<size; i++) { 1928 for (j=rowners[i]; j< rowners[i+1]; j++) { 1929 procsnz[i] += rowlengths[j]; 1930 } 1931 } 1932 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1933 1934 /* determine max buffer needed and allocate it */ 1935 maxnz = 0; 1936 for (i=0; i<size; i++) { 1937 maxnz = PetscMax(maxnz,procsnz[i]); 1938 } 1939 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1940 1941 /* read in my part of the matrix column indices */ 1942 nz = procsnz[0]; 1943 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1944 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1945 1946 /* read in every one elses and ship off */ 1947 for (i=1; i<size; i++) { 1948 nz = procsnz[i]; 1949 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1950 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1951 } 1952 ierr = PetscFree(cols);CHKERRQ(ierr); 1953 } else { 1954 /* determine buffer space needed for message */ 1955 nz = 0; 1956 for (i=0; i<m; i++) { 1957 nz += ourlens[i]; 1958 } 1959 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1960 1961 /* receive message of column indices*/ 1962 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1963 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1964 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1965 } 1966 1967 /* determine column ownership if matrix is not square */ 1968 if (N != M) { 1969 n = N/size + ((N % size) > rank); 1970 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1971 cstart = cend - n; 1972 } else { 1973 cstart = rstart; 1974 cend = rend; 1975 n = cend - cstart; 1976 } 1977 1978 /* loop over local rows, determining number of off diagonal entries */ 1979 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1980 jj = 0; 1981 for (i=0; i<m; i++) { 1982 for (j=0; j<ourlens[i]; j++) { 1983 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1984 jj++; 1985 } 1986 } 1987 1988 /* create our matrix */ 1989 for (i=0; i<m; i++) { 1990 ourlens[i] -= offlens[i]; 1991 } 1992 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1993 A = *newmat; 1994 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1995 for (i=0; i<m; i++) { 1996 ourlens[i] += offlens[i]; 1997 } 1998 1999 if (!rank) { 2000 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2001 2002 /* read in my part of the matrix numerical values */ 2003 nz = procsnz[0]; 2004 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2005 2006 /* insert into matrix */ 2007 jj = rstart; 2008 smycols = mycols; 2009 svals = vals; 2010 for (i=0; i<m; i++) { 2011 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2012 smycols += ourlens[i]; 2013 svals += ourlens[i]; 2014 jj++; 2015 } 2016 2017 /* read in other processors and ship out */ 2018 for (i=1; i<size; i++) { 2019 nz = procsnz[i]; 2020 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2021 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2022 } 2023 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2024 } else { 2025 /* receive numeric values */ 2026 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2027 2028 /* receive message of values*/ 2029 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2030 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2031 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2032 2033 /* insert into matrix */ 2034 jj = rstart; 2035 smycols = mycols; 2036 svals = vals; 2037 for (i=0; i<m; i++) { 2038 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2039 smycols += ourlens[i]; 2040 svals += ourlens[i]; 2041 jj++; 2042 } 2043 } 2044 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2045 ierr = PetscFree(vals);CHKERRQ(ierr); 2046 ierr = PetscFree(mycols);CHKERRQ(ierr); 2047 ierr = PetscFree(rowners);CHKERRQ(ierr); 2048 2049 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2050 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2051 #if defined(PETSC_HAVE_SPOOLES) 2052 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 2053 if (flag) { 2054 if (size == 1) { 2055 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2056 } else { 2057 ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); 2058 } 2059 } 2060 #endif 2061 #if defined(PETSC_HAVE_SUPERLUDIST) 2062 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 2063 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 2064 #endif 2065 PetscFunctionReturn(0); 2066 } 2067 EXTERN_C_END 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2071 /* 2072 Not great since it makes two copies of the submatrix, first an SeqAIJ 2073 in local and then by concatenating the local matrices the end result. 2074 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2075 */ 2076 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2077 { 2078 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2079 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2080 Mat *local,M,Mreuse; 2081 PetscScalar *vwork,*aa; 2082 MPI_Comm comm = mat->comm; 2083 Mat_SeqAIJ *aij; 2084 2085 2086 PetscFunctionBegin; 2087 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2088 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2089 2090 if (call == MAT_REUSE_MATRIX) { 2091 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2092 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2093 local = &Mreuse; 2094 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2095 } else { 2096 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2097 Mreuse = *local; 2098 ierr = PetscFree(local);CHKERRQ(ierr); 2099 } 2100 2101 /* 2102 m - number of local rows 2103 n - number of columns (same on all processors) 2104 rstart - first row in new global matrix generated 2105 */ 2106 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2107 if (call == MAT_INITIAL_MATRIX) { 2108 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2109 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2110 ii = aij->i; 2111 jj = aij->j; 2112 2113 /* 2114 Determine the number of non-zeros in the diagonal and off-diagonal 2115 portions of the matrix in order to do correct preallocation 2116 */ 2117 2118 /* first get start and end of "diagonal" columns */ 2119 if (csize == PETSC_DECIDE) { 2120 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2121 if (mglobal == n) { /* square matrix */ 2122 nlocal = m; 2123 } else { 2124 nlocal = n/size + ((n % size) > rank); 2125 } 2126 } else { 2127 nlocal = csize; 2128 } 2129 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2130 rstart = rend - nlocal; 2131 if (rank == size - 1 && rend != n) { 2132 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 2133 } 2134 2135 /* next, compute all the lengths */ 2136 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2137 olens = dlens + m; 2138 for (i=0; i<m; i++) { 2139 jend = ii[i+1] - ii[i]; 2140 olen = 0; 2141 dlen = 0; 2142 for (j=0; j<jend; j++) { 2143 if (*jj < rstart || *jj >= rend) olen++; 2144 else dlen++; 2145 jj++; 2146 } 2147 olens[i] = olen; 2148 dlens[i] = dlen; 2149 } 2150 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2151 ierr = PetscFree(dlens);CHKERRQ(ierr); 2152 } else { 2153 int ml,nl; 2154 2155 M = *newmat; 2156 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2157 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2158 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2159 /* 2160 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2161 rather than the slower MatSetValues(). 2162 */ 2163 M->was_assembled = PETSC_TRUE; 2164 M->assembled = PETSC_FALSE; 2165 } 2166 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2167 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2168 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2169 ii = aij->i; 2170 jj = aij->j; 2171 aa = aij->a; 2172 for (i=0; i<m; i++) { 2173 row = rstart + i; 2174 nz = ii[i+1] - ii[i]; 2175 cwork = jj; jj += nz; 2176 vwork = aa; aa += nz; 2177 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2178 } 2179 2180 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2181 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2182 *newmat = M; 2183 2184 /* save submatrix used in processor for next request */ 2185 if (call == MAT_INITIAL_MATRIX) { 2186 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2187 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2188 } 2189 2190 PetscFunctionReturn(0); 2191 } 2192 2193 #undef __FUNCT__ 2194 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2195 /*@C 2196 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2197 (the default parallel PETSc format). For good matrix assembly performance 2198 the user should preallocate the matrix storage by setting the parameters 2199 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2200 performance can be increased by more than a factor of 50. 2201 2202 Collective on MPI_Comm 2203 2204 Input Parameters: 2205 + A - the matrix 2206 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2207 (same value is used for all local rows) 2208 . d_nnz - array containing the number of nonzeros in the various rows of the 2209 DIAGONAL portion of the local submatrix (possibly different for each row) 2210 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2211 The size of this array is equal to the number of local rows, i.e 'm'. 2212 You must leave room for the diagonal entry even if it is zero. 2213 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2214 submatrix (same value is used for all local rows). 2215 - o_nnz - array containing the number of nonzeros in the various rows of the 2216 OFF-DIAGONAL portion of the local submatrix (possibly different for 2217 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2218 structure. The size of this array is equal to the number 2219 of local rows, i.e 'm'. 2220 2221 The AIJ format (also called the Yale sparse matrix format or 2222 compressed row storage), is fully compatible with standard Fortran 77 2223 storage. That is, the stored row and column indices can begin at 2224 either one (as in Fortran) or zero. See the users manual for details. 2225 2226 The user MUST specify either the local or global matrix dimensions 2227 (possibly both). 2228 2229 The parallel matrix is partitioned such that the first m0 rows belong to 2230 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2231 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2232 2233 The DIAGONAL portion of the local submatrix of a processor can be defined 2234 as the submatrix which is obtained by extraction the part corresponding 2235 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2236 first row that belongs to the processor, and r2 is the last row belonging 2237 to the this processor. This is a square mxm matrix. The remaining portion 2238 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2239 2240 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2241 2242 By default, this format uses inodes (identical nodes) when possible. 2243 We search for consecutive rows with the same nonzero structure, thereby 2244 reusing matrix information to achieve increased efficiency. 2245 2246 Options Database Keys: 2247 + -mat_aij_no_inode - Do not use inodes 2248 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2249 - -mat_aij_oneindex - Internally use indexing starting at 1 2250 rather than 0. Note that when calling MatSetValues(), 2251 the user still MUST index entries starting at 0! 2252 2253 Example usage: 2254 2255 Consider the following 8x8 matrix with 34 non-zero values, that is 2256 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2257 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2258 as follows: 2259 2260 .vb 2261 1 2 0 | 0 3 0 | 0 4 2262 Proc0 0 5 6 | 7 0 0 | 8 0 2263 9 0 10 | 11 0 0 | 12 0 2264 ------------------------------------- 2265 13 0 14 | 15 16 17 | 0 0 2266 Proc1 0 18 0 | 19 20 21 | 0 0 2267 0 0 0 | 22 23 0 | 24 0 2268 ------------------------------------- 2269 Proc2 25 26 27 | 0 0 28 | 29 0 2270 30 0 0 | 31 32 33 | 0 34 2271 .ve 2272 2273 This can be represented as a collection of submatrices as: 2274 2275 .vb 2276 A B C 2277 D E F 2278 G H I 2279 .ve 2280 2281 Where the submatrices A,B,C are owned by proc0, D,E,F are 2282 owned by proc1, G,H,I are owned by proc2. 2283 2284 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2285 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2286 The 'M','N' parameters are 8,8, and have the same values on all procs. 2287 2288 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2289 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2290 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2291 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2292 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2293 matrix, ans [DF] as another SeqAIJ matrix. 2294 2295 When d_nz, o_nz parameters are specified, d_nz storage elements are 2296 allocated for every row of the local diagonal submatrix, and o_nz 2297 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2298 One way to choose d_nz and o_nz is to use the max nonzerors per local 2299 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2300 In this case, the values of d_nz,o_nz are: 2301 .vb 2302 proc0 : dnz = 2, o_nz = 2 2303 proc1 : dnz = 3, o_nz = 2 2304 proc2 : dnz = 1, o_nz = 4 2305 .ve 2306 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2307 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2308 for proc3. i.e we are using 12+15+10=37 storage locations to store 2309 34 values. 2310 2311 When d_nnz, o_nnz parameters are specified, the storage is specified 2312 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2313 In the above case the values for d_nnz,o_nnz are: 2314 .vb 2315 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2316 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2317 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2318 .ve 2319 Here the space allocated is sum of all the above values i.e 34, and 2320 hence pre-allocation is perfect. 2321 2322 Level: intermediate 2323 2324 .keywords: matrix, aij, compressed row, sparse, parallel 2325 2326 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2327 @*/ 2328 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2329 { 2330 Mat_MPIAIJ *b; 2331 int ierr,i; 2332 PetscTruth flg2; 2333 2334 PetscFunctionBegin; 2335 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2336 if (!flg2) PetscFunctionReturn(0); 2337 B->preallocated = PETSC_TRUE; 2338 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2339 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2340 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2341 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2342 if (d_nnz) { 2343 for (i=0; i<B->m; i++) { 2344 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2345 } 2346 } 2347 if (o_nnz) { 2348 for (i=0; i<B->m; i++) { 2349 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2350 } 2351 } 2352 b = (Mat_MPIAIJ*)B->data; 2353 2354 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2355 PetscLogObjectParent(B,b->A); 2356 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2357 PetscLogObjectParent(B,b->B); 2358 2359 PetscFunctionReturn(0); 2360 } 2361 2362 #undef __FUNCT__ 2363 #define __FUNCT__ "MatCreateMPIAIJ" 2364 /*@C 2365 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2366 (the default parallel PETSc format). For good matrix assembly performance 2367 the user should preallocate the matrix storage by setting the parameters 2368 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2369 performance can be increased by more than a factor of 50. 2370 2371 Collective on MPI_Comm 2372 2373 Input Parameters: 2374 + comm - MPI communicator 2375 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2376 This value should be the same as the local size used in creating the 2377 y vector for the matrix-vector product y = Ax. 2378 . n - This value should be the same as the local size used in creating the 2379 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2380 calculated if N is given) For square matrices n is almost always m. 2381 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2382 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2383 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2384 (same value is used for all local rows) 2385 . d_nnz - array containing the number of nonzeros in the various rows of the 2386 DIAGONAL portion of the local submatrix (possibly different for each row) 2387 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2388 The size of this array is equal to the number of local rows, i.e 'm'. 2389 You must leave room for the diagonal entry even if it is zero. 2390 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2391 submatrix (same value is used for all local rows). 2392 - o_nnz - array containing the number of nonzeros in the various rows of the 2393 OFF-DIAGONAL portion of the local submatrix (possibly different for 2394 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2395 structure. The size of this array is equal to the number 2396 of local rows, i.e 'm'. 2397 2398 Output Parameter: 2399 . A - the matrix 2400 2401 Notes: 2402 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2403 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2404 storage requirements for this matrix. 2405 2406 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2407 processor than it must be used on all processors that share the object for 2408 that argument. 2409 2410 The AIJ format (also called the Yale sparse matrix format or 2411 compressed row storage), is fully compatible with standard Fortran 77 2412 storage. That is, the stored row and column indices can begin at 2413 either one (as in Fortran) or zero. See the users manual for details. 2414 2415 The user MUST specify either the local or global matrix dimensions 2416 (possibly both). 2417 2418 The parallel matrix is partitioned such that the first m0 rows belong to 2419 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2420 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2421 2422 The DIAGONAL portion of the local submatrix of a processor can be defined 2423 as the submatrix which is obtained by extraction the part corresponding 2424 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2425 first row that belongs to the processor, and r2 is the last row belonging 2426 to the this processor. This is a square mxm matrix. The remaining portion 2427 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2428 2429 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2430 2431 By default, this format uses inodes (identical nodes) when possible. 2432 We search for consecutive rows with the same nonzero structure, thereby 2433 reusing matrix information to achieve increased efficiency. 2434 2435 Options Database Keys: 2436 + -mat_aij_no_inode - Do not use inodes 2437 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2438 - -mat_aij_oneindex - Internally use indexing starting at 1 2439 rather than 0. Note that when calling MatSetValues(), 2440 the user still MUST index entries starting at 0! 2441 2442 2443 Example usage: 2444 2445 Consider the following 8x8 matrix with 34 non-zero values, that is 2446 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2447 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2448 as follows: 2449 2450 .vb 2451 1 2 0 | 0 3 0 | 0 4 2452 Proc0 0 5 6 | 7 0 0 | 8 0 2453 9 0 10 | 11 0 0 | 12 0 2454 ------------------------------------- 2455 13 0 14 | 15 16 17 | 0 0 2456 Proc1 0 18 0 | 19 20 21 | 0 0 2457 0 0 0 | 22 23 0 | 24 0 2458 ------------------------------------- 2459 Proc2 25 26 27 | 0 0 28 | 29 0 2460 30 0 0 | 31 32 33 | 0 34 2461 .ve 2462 2463 This can be represented as a collection of submatrices as: 2464 2465 .vb 2466 A B C 2467 D E F 2468 G H I 2469 .ve 2470 2471 Where the submatrices A,B,C are owned by proc0, D,E,F are 2472 owned by proc1, G,H,I are owned by proc2. 2473 2474 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2475 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2476 The 'M','N' parameters are 8,8, and have the same values on all procs. 2477 2478 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2479 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2480 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2481 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2482 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2483 matrix, ans [DF] as another SeqAIJ matrix. 2484 2485 When d_nz, o_nz parameters are specified, d_nz storage elements are 2486 allocated for every row of the local diagonal submatrix, and o_nz 2487 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2488 One way to choose d_nz and o_nz is to use the max nonzerors per local 2489 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2490 In this case, the values of d_nz,o_nz are: 2491 .vb 2492 proc0 : dnz = 2, o_nz = 2 2493 proc1 : dnz = 3, o_nz = 2 2494 proc2 : dnz = 1, o_nz = 4 2495 .ve 2496 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2497 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2498 for proc3. i.e we are using 12+15+10=37 storage locations to store 2499 34 values. 2500 2501 When d_nnz, o_nnz parameters are specified, the storage is specified 2502 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2503 In the above case the values for d_nnz,o_nnz are: 2504 .vb 2505 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2506 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2507 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2508 .ve 2509 Here the space allocated is sum of all the above values i.e 34, and 2510 hence pre-allocation is perfect. 2511 2512 Level: intermediate 2513 2514 .keywords: matrix, aij, compressed row, sparse, parallel 2515 2516 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2517 @*/ 2518 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2519 { 2520 int ierr,size; 2521 2522 PetscFunctionBegin; 2523 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2524 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2525 if (size > 1) { 2526 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2527 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2528 } else { 2529 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2530 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2531 } 2532 PetscFunctionReturn(0); 2533 } 2534 2535 #undef __FUNCT__ 2536 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2537 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2538 { 2539 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2540 PetscFunctionBegin; 2541 *Ad = a->A; 2542 *Ao = a->B; 2543 *colmap = a->garray; 2544 PetscFunctionReturn(0); 2545 } 2546 2547 #undef __FUNCT__ 2548 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2549 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2550 { 2551 int ierr,i; 2552 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2553 2554 PetscFunctionBegin; 2555 if (coloring->ctype == IS_COLORING_LOCAL) { 2556 ISColoringValue *allcolors,*colors; 2557 ISColoring ocoloring; 2558 2559 /* set coloring for diagonal portion */ 2560 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2561 2562 /* set coloring for off-diagonal portion */ 2563 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2564 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2565 for (i=0; i<a->B->n; i++) { 2566 colors[i] = allcolors[a->garray[i]]; 2567 } 2568 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2569 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2570 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2571 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2572 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2573 ISColoringValue *colors; 2574 int *larray; 2575 ISColoring ocoloring; 2576 2577 /* set coloring for diagonal portion */ 2578 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2579 for (i=0; i<a->A->n; i++) { 2580 larray[i] = i + a->cstart; 2581 } 2582 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2583 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2584 for (i=0; i<a->A->n; i++) { 2585 colors[i] = coloring->colors[larray[i]]; 2586 } 2587 ierr = PetscFree(larray);CHKERRQ(ierr); 2588 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2589 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2590 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2591 2592 /* set coloring for off-diagonal portion */ 2593 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2594 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2595 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2596 for (i=0; i<a->B->n; i++) { 2597 colors[i] = coloring->colors[larray[i]]; 2598 } 2599 ierr = PetscFree(larray);CHKERRQ(ierr); 2600 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2601 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2602 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2603 } else { 2604 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2605 } 2606 2607 PetscFunctionReturn(0); 2608 } 2609 2610 #undef __FUNCT__ 2611 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2612 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2613 { 2614 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2615 int ierr; 2616 2617 PetscFunctionBegin; 2618 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2619 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2620 PetscFunctionReturn(0); 2621 } 2622 2623 #undef __FUNCT__ 2624 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2625 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2626 { 2627 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2628 int ierr; 2629 2630 PetscFunctionBegin; 2631 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2632 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2633 PetscFunctionReturn(0); 2634 } 2635 2636 #undef __FUNCT__ 2637 #define __FUNCT__ "MatMerge" 2638 /*@C 2639 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2640 matrices from each processor 2641 2642 Collective on MPI_Comm 2643 2644 Input Parameters: 2645 + comm - the communicators the parallel matrix will live on 2646 - inmat - the input sequential matrices 2647 2648 Output Parameter: 2649 . outmat - the parallel matrix generated 2650 2651 Level: advanced 2652 2653 Notes: The number of columns of the matrix in EACH of the seperate files 2654 MUST be the same. 2655 2656 @*/ 2657 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2658 { 2659 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2660 PetscScalar *values; 2661 PetscMap columnmap,rowmap; 2662 2663 PetscFunctionBegin; 2664 2665 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2666 2667 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2668 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2669 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2670 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2671 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2672 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2673 2674 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2675 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2676 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2677 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2678 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2679 2680 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2681 for (i=0;i<m;i++) { 2682 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2683 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2684 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2685 } 2686 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2687 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2688 2689 for (i=0;i<m;i++) { 2690 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2691 I = i + rstart; 2692 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2693 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2694 } 2695 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2696 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2697 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2698 2699 PetscFunctionReturn(0); 2700 } 2701 2702 #undef __FUNCT__ 2703 #define __FUNCT__ "MatFileSplit" 2704 int MatFileSplit(Mat A,char *outfile) 2705 { 2706 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2707 PetscViewer out; 2708 char *name; 2709 Mat B; 2710 PetscScalar *values; 2711 2712 PetscFunctionBegin; 2713 2714 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2715 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2716 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2717 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2718 for (i=0;i<m;i++) { 2719 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2720 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2721 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2722 } 2723 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2724 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2725 2726 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2727 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2728 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2729 sprintf(name,"%s.%d",outfile,rank); 2730 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2731 ierr = PetscFree(name); 2732 ierr = MatView(B,out);CHKERRQ(ierr); 2733 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2734 ierr = MatDestroy(B);CHKERRQ(ierr); 2735 PetscFunctionReturn(0); 2736 } 2737