1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 #if defined(PETSC_HAVE_SUPERLUDIST) 413 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 414 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 415 #endif 416 417 #if defined(PETSC_HAVE_SPOOLES) 418 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 419 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 420 #endif 421 422 #if defined(PETSC_HAVE_MUMPS) 423 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 424 if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); } 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 431 int MatZeroEntries_MPIAIJ(Mat A) 432 { 433 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 434 int ierr; 435 436 PetscFunctionBegin; 437 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 438 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatZeroRows_MPIAIJ" 444 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 445 { 446 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 447 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 448 int *nprocs,j,idx,nsends,row; 449 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 450 int *rvalues,tag = A->tag,count,base,slen,n,*source; 451 int *lens,imdex,*lrows,*values,rstart=l->rstart; 452 MPI_Comm comm = A->comm; 453 MPI_Request *send_waits,*recv_waits; 454 MPI_Status recv_status,*send_status; 455 IS istmp; 456 PetscTruth found; 457 458 PetscFunctionBegin; 459 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 460 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 461 462 /* first count number of contributors to each processor */ 463 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 464 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 465 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 466 for (i=0; i<N; i++) { 467 idx = rows[i]; 468 found = PETSC_FALSE; 469 for (j=0; j<size; j++) { 470 if (idx >= owners[j] && idx < owners[j+1]) { 471 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 472 } 473 } 474 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 475 } 476 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 477 478 /* inform other processors of number of messages and max length*/ 479 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 480 481 /* post receives: */ 482 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 484 for (i=0; i<nrecvs; i++) { 485 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 486 } 487 488 /* do sends: 489 1) starts[i] gives the starting index in svalues for stuff going to 490 the ith processor 491 */ 492 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 493 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 494 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 495 starts[0] = 0; 496 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 497 for (i=0; i<N; i++) { 498 svalues[starts[owner[i]]++] = rows[i]; 499 } 500 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 501 502 starts[0] = 0; 503 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 504 count = 0; 505 for (i=0; i<size; i++) { 506 if (nprocs[2*i+1]) { 507 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 508 } 509 } 510 ierr = PetscFree(starts);CHKERRQ(ierr); 511 512 base = owners[rank]; 513 514 /* wait on receives */ 515 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 516 source = lens + nrecvs; 517 count = nrecvs; slen = 0; 518 while (count) { 519 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 520 /* unpack receives into our local space */ 521 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 522 source[imdex] = recv_status.MPI_SOURCE; 523 lens[imdex] = n; 524 slen += n; 525 count--; 526 } 527 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 528 529 /* move the data into the send scatter */ 530 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 531 count = 0; 532 for (i=0; i<nrecvs; i++) { 533 values = rvalues + i*nmax; 534 for (j=0; j<lens[i]; j++) { 535 lrows[count++] = values[j] - base; 536 } 537 } 538 ierr = PetscFree(rvalues);CHKERRQ(ierr); 539 ierr = PetscFree(lens);CHKERRQ(ierr); 540 ierr = PetscFree(owner);CHKERRQ(ierr); 541 ierr = PetscFree(nprocs);CHKERRQ(ierr); 542 543 /* actually zap the local rows */ 544 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 545 PetscLogObjectParent(A,istmp); 546 547 /* 548 Zero the required rows. If the "diagonal block" of the matrix 549 is square and the user wishes to set the diagonal we use seperate 550 code so that MatSetValues() is not called for each diagonal allocating 551 new memory, thus calling lots of mallocs and slowing things down. 552 553 Contributed by: Mathew Knepley 554 */ 555 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 556 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 557 if (diag && (l->A->M == l->A->N)) { 558 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 559 } else if (diag) { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 562 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 564 } 565 for (i = 0; i < slen; i++) { 566 row = lrows[i] + rstart; 567 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 568 } 569 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 } else { 572 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 573 } 574 ierr = ISDestroy(istmp);CHKERRQ(ierr); 575 ierr = PetscFree(lrows);CHKERRQ(ierr); 576 577 /* wait on sends */ 578 if (nsends) { 579 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 580 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 581 ierr = PetscFree(send_status);CHKERRQ(ierr); 582 } 583 ierr = PetscFree(send_waits);CHKERRQ(ierr); 584 ierr = PetscFree(svalues);CHKERRQ(ierr); 585 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMult_MPIAIJ" 591 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 int ierr,nt; 595 596 PetscFunctionBegin; 597 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 598 if (nt != A->n) { 599 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 600 } 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultAdd_MPIAIJ" 610 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 int ierr; 614 615 PetscFunctionBegin; 616 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 617 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 618 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 619 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 625 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 626 { 627 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 628 int ierr; 629 630 PetscFunctionBegin; 631 /* do nondiagonal part */ 632 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 633 /* send it on its way */ 634 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 635 /* do local part */ 636 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 637 /* receive remote parts: note this assumes the values are not actually */ 638 /* inserted in yy until the next line, which is true for my implementation*/ 639 /* but is not perhaps always true. */ 640 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } 643 644 EXTERN_C_BEGIN 645 #undef __FUNCT__ 646 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 647 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 648 { 649 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 650 Mat A = Aij->A, B,Aoff = Aij->B,Boff; 651 MatType type; 652 IS Me,Notme; 653 int M,N,first,last,*notme,i, ierr; 654 655 PetscFunctionBegin; 656 657 /* Compatible types */ 658 ierr = MatGetType(Bmat,&type); CHKERRQ(ierr); 659 ierr = PetscStrcmp(type,MATMPIAIJ,f); CHKERRQ(ierr); 660 if (!*f) SETERRQ(1,"Second matrix needs to be MPIAIJ too"); 661 662 /* Easy test: symmetric diagonal block */ 663 Bij = (Mat_MPIAIJ *) Bmat->data; B = Bij->B; 664 ierr = MatIsSymmetric(A,B,f); CHKERRQ(ierr); 665 if (!*f) PetscFunctionReturn(0); 666 667 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 668 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 669 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 670 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 671 for (i=0; i<first; i++) notme[i] = i; 672 for (i=last; i<M; i++) notme[i-last+first] = i; 673 ierr = ISCreateGeneral 674 (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 675 ierr = ISCreateStride 676 (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 677 ierr = MatGetSubMatrix 678 (B,Notme,Me,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Boff); CHKERRQ(ierr); 679 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 680 ierr = MatDestroy(Boff); CHKERRQ(ierr); 681 ierr = ISDestroy(Me); CHKERRQ(ierr); 682 ierr = ISDestroy(Notme); CHKERRQ(ierr); 683 684 PetscFunctionReturn(0); 685 } 686 EXTERN_C_END 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 690 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 691 { 692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 693 int ierr; 694 695 PetscFunctionBegin; 696 /* do nondiagonal part */ 697 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 698 /* send it on its way */ 699 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 700 /* do local part */ 701 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 702 /* receive remote parts: note this assumes the values are not actually */ 703 /* inserted in yy until the next line, which is true for my implementation*/ 704 /* but is not perhaps always true. */ 705 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 709 /* 710 This only works correctly for square matrices where the subblock A->A is the 711 diagonal block 712 */ 713 #undef __FUNCT__ 714 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 715 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 716 { 717 int ierr; 718 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 719 720 PetscFunctionBegin; 721 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 722 if (a->rstart != a->cstart || a->rend != a->cend) { 723 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 724 } 725 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatScale_MPIAIJ" 731 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 732 { 733 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 734 int ierr; 735 736 PetscFunctionBegin; 737 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 738 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatDestroy_MPIAIJ" 744 int MatDestroy_MPIAIJ(Mat mat) 745 { 746 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 747 int ierr; 748 749 PetscFunctionBegin; 750 #if defined(PETSC_USE_LOG) 751 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 752 #endif 753 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 754 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 755 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 756 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 757 #if defined (PETSC_USE_CTABLE) 758 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 759 #else 760 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 761 #endif 762 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 763 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 764 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 765 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 766 ierr = PetscFree(aij);CHKERRQ(ierr); 767 PetscFunctionReturn(0); 768 } 769 770 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 771 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 772 extern int MatFactorInfo_MUMPS(Mat,PetscViewer); 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatView_MPIAIJ_Binary" 776 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 777 { 778 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 779 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 780 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 781 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 782 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 783 PetscScalar *column_values; 784 785 PetscFunctionBegin; 786 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 787 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 788 nz = A->nz + B->nz; 789 if (rank == 0) { 790 header[0] = MAT_FILE_COOKIE; 791 header[1] = mat->M; 792 header[2] = mat->N; 793 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 794 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 795 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 796 /* get largest number of rows any processor has */ 797 rlen = mat->m; 798 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 799 for (i=1; i<size; i++) { 800 rlen = PetscMax(rlen,range[i+1] - range[i]); 801 } 802 } else { 803 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 804 rlen = mat->m; 805 } 806 807 /* load up the local row counts */ 808 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 809 for (i=0; i<mat->m; i++) { 810 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 811 } 812 813 /* store the row lengths to the file */ 814 if (rank == 0) { 815 MPI_Status status; 816 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 817 for (i=1; i<size; i++) { 818 rlen = range[i+1] - range[i]; 819 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 820 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 821 } 822 } else { 823 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 824 } 825 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 826 827 /* load up the local column indices */ 828 nzmax = nz; /* )th processor needs space a largest processor needs */ 829 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 830 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 831 cnt = 0; 832 for (i=0; i<mat->m; i++) { 833 for (j=B->i[i]; j<B->i[i+1]; j++) { 834 if ( (col = garray[B->j[j]]) > cstart) break; 835 column_indices[cnt++] = col; 836 } 837 for (k=A->i[i]; k<A->i[i+1]; k++) { 838 column_indices[cnt++] = A->j[k] + cstart; 839 } 840 for (; j<B->i[i+1]; j++) { 841 column_indices[cnt++] = garray[B->j[j]]; 842 } 843 } 844 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 845 846 /* store the column indices to the file */ 847 if (rank == 0) { 848 MPI_Status status; 849 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 850 for (i=1; i<size; i++) { 851 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 852 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 853 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 855 } 856 } else { 857 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 858 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(column_indices);CHKERRQ(ierr); 861 862 /* load up the local column values */ 863 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 864 cnt = 0; 865 for (i=0; i<mat->m; i++) { 866 for (j=B->i[i]; j<B->i[i+1]; j++) { 867 if ( garray[B->j[j]] > cstart) break; 868 column_values[cnt++] = B->a[j]; 869 } 870 for (k=A->i[i]; k<A->i[i+1]; k++) { 871 column_values[cnt++] = A->a[k]; 872 } 873 for (; j<B->i[i+1]; j++) { 874 column_values[cnt++] = B->a[j]; 875 } 876 } 877 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 878 879 /* store the column values to the file */ 880 if (rank == 0) { 881 MPI_Status status; 882 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 883 for (i=1; i<size; i++) { 884 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 885 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 886 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 887 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 888 } 889 } else { 890 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 891 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 892 } 893 ierr = PetscFree(column_values);CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 899 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 900 { 901 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 902 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 903 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 904 PetscTruth isdraw,isascii,flg,isbinary; 905 PetscViewer sviewer; 906 PetscViewerFormat format; 907 908 PetscFunctionBegin; 909 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 910 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 911 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 912 if (isascii) { 913 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 914 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 915 MatInfo info; 916 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 917 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 918 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 919 if (flg) { 920 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 921 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 922 } else { 923 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 924 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 925 } 926 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 927 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 928 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 929 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 930 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 931 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 932 PetscFunctionReturn(0); 933 } else if (format == PETSC_VIEWER_ASCII_INFO) { 934 PetscFunctionReturn(0); 935 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 936 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 937 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 938 #endif 939 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 940 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 941 #endif 942 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE) 943 ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr); 944 #endif 945 PetscFunctionReturn(0); 946 } 947 } else if (isbinary) { 948 if (size == 1) { 949 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 950 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 951 } else { 952 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 953 } 954 PetscFunctionReturn(0); 955 } else if (isdraw) { 956 PetscDraw draw; 957 PetscTruth isnull; 958 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 959 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 960 } 961 962 if (size == 1) { 963 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 964 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 965 } else { 966 /* assemble the entire matrix onto first processor. */ 967 Mat A; 968 Mat_SeqAIJ *Aloc; 969 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 970 PetscScalar *a; 971 972 if (!rank) { 973 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 974 } else { 975 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 976 } 977 PetscLogObjectParent(mat,A); 978 979 /* copy over the A part */ 980 Aloc = (Mat_SeqAIJ*)aij->A->data; 981 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 982 row = aij->rstart; 983 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 984 for (i=0; i<m; i++) { 985 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 986 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 987 } 988 aj = Aloc->j; 989 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 990 991 /* copy over the B part */ 992 Aloc = (Mat_SeqAIJ*)aij->B->data; 993 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 994 row = aij->rstart; 995 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 996 ct = cols; 997 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 998 for (i=0; i<m; i++) { 999 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1000 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1001 } 1002 ierr = PetscFree(ct);CHKERRQ(ierr); 1003 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1004 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1005 /* 1006 Everyone has to call to draw the matrix since the graphics waits are 1007 synchronized across all processors that share the PetscDraw object 1008 */ 1009 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1010 if (!rank) { 1011 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1012 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1013 } 1014 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1015 ierr = MatDestroy(A);CHKERRQ(ierr); 1016 } 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatView_MPIAIJ" 1022 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1023 { 1024 int ierr; 1025 PetscTruth isascii,isdraw,issocket,isbinary; 1026 1027 PetscFunctionBegin; 1028 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1029 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1030 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1031 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1032 if (isascii || isdraw || isbinary || issocket) { 1033 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1034 } else { 1035 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1036 } 1037 PetscFunctionReturn(0); 1038 } 1039 1040 1041 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatRelax_MPIAIJ" 1044 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1045 { 1046 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1047 int ierr; 1048 Vec bb1; 1049 PetscScalar mone=-1.0; 1050 1051 PetscFunctionBegin; 1052 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1053 1054 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1055 1056 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1057 if (flag & SOR_ZERO_INITIAL_GUESS) { 1058 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1059 its--; 1060 } 1061 1062 while (its--) { 1063 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1064 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1065 1066 /* update rhs: bb1 = bb - B*x */ 1067 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1068 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1069 1070 /* local sweep */ 1071 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1072 CHKERRQ(ierr); 1073 } 1074 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1075 if (flag & SOR_ZERO_INITIAL_GUESS) { 1076 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1077 its--; 1078 } 1079 while (its--) { 1080 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1081 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1082 1083 /* update rhs: bb1 = bb - B*x */ 1084 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1085 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1086 1087 /* local sweep */ 1088 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1089 CHKERRQ(ierr); 1090 } 1091 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1092 if (flag & SOR_ZERO_INITIAL_GUESS) { 1093 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1094 its--; 1095 } 1096 while (its--) { 1097 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1098 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1099 1100 /* update rhs: bb1 = bb - B*x */ 1101 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1102 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1103 1104 /* local sweep */ 1105 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1106 CHKERRQ(ierr); 1107 } 1108 } else { 1109 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1110 } 1111 1112 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1118 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1119 { 1120 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1121 Mat A = mat->A,B = mat->B; 1122 int ierr; 1123 PetscReal isend[5],irecv[5]; 1124 1125 PetscFunctionBegin; 1126 info->block_size = 1.0; 1127 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1128 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1129 isend[3] = info->memory; isend[4] = info->mallocs; 1130 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1131 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1132 isend[3] += info->memory; isend[4] += info->mallocs; 1133 if (flag == MAT_LOCAL) { 1134 info->nz_used = isend[0]; 1135 info->nz_allocated = isend[1]; 1136 info->nz_unneeded = isend[2]; 1137 info->memory = isend[3]; 1138 info->mallocs = isend[4]; 1139 } else if (flag == MAT_GLOBAL_MAX) { 1140 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1141 info->nz_used = irecv[0]; 1142 info->nz_allocated = irecv[1]; 1143 info->nz_unneeded = irecv[2]; 1144 info->memory = irecv[3]; 1145 info->mallocs = irecv[4]; 1146 } else if (flag == MAT_GLOBAL_SUM) { 1147 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1148 info->nz_used = irecv[0]; 1149 info->nz_allocated = irecv[1]; 1150 info->nz_unneeded = irecv[2]; 1151 info->memory = irecv[3]; 1152 info->mallocs = irecv[4]; 1153 } 1154 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1155 info->fill_ratio_needed = 0; 1156 info->factor_mallocs = 0; 1157 info->rows_global = (double)matin->M; 1158 info->columns_global = (double)matin->N; 1159 info->rows_local = (double)matin->m; 1160 info->columns_local = (double)matin->N; 1161 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatSetOption_MPIAIJ" 1167 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1168 { 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1170 int ierr; 1171 1172 PetscFunctionBegin; 1173 switch (op) { 1174 case MAT_NO_NEW_NONZERO_LOCATIONS: 1175 case MAT_YES_NEW_NONZERO_LOCATIONS: 1176 case MAT_COLUMNS_UNSORTED: 1177 case MAT_COLUMNS_SORTED: 1178 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1179 case MAT_KEEP_ZEROED_ROWS: 1180 case MAT_NEW_NONZERO_LOCATION_ERR: 1181 case MAT_USE_INODES: 1182 case MAT_DO_NOT_USE_INODES: 1183 case MAT_IGNORE_ZERO_ENTRIES: 1184 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1185 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1186 break; 1187 case MAT_ROW_ORIENTED: 1188 a->roworiented = PETSC_TRUE; 1189 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1190 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1191 break; 1192 case MAT_ROWS_SORTED: 1193 case MAT_ROWS_UNSORTED: 1194 case MAT_YES_NEW_DIAGONALS: 1195 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1196 break; 1197 case MAT_COLUMN_ORIENTED: 1198 a->roworiented = PETSC_FALSE; 1199 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1200 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1201 break; 1202 case MAT_IGNORE_OFF_PROC_ENTRIES: 1203 a->donotstash = PETSC_TRUE; 1204 break; 1205 case MAT_NO_NEW_DIAGONALS: 1206 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1207 default: 1208 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1209 } 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatGetRow_MPIAIJ" 1215 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1216 { 1217 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1218 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1219 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1220 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1221 int *cmap,*idx_p; 1222 1223 PetscFunctionBegin; 1224 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1225 mat->getrowactive = PETSC_TRUE; 1226 1227 if (!mat->rowvalues && (idx || v)) { 1228 /* 1229 allocate enough space to hold information from the longest row. 1230 */ 1231 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1232 int max = 1,tmp; 1233 for (i=0; i<matin->m; i++) { 1234 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1235 if (max < tmp) { max = tmp; } 1236 } 1237 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1238 mat->rowindices = (int*)(mat->rowvalues + max); 1239 } 1240 1241 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1242 lrow = row - rstart; 1243 1244 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1245 if (!v) {pvA = 0; pvB = 0;} 1246 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1247 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1249 nztot = nzA + nzB; 1250 1251 cmap = mat->garray; 1252 if (v || idx) { 1253 if (nztot) { 1254 /* Sort by increasing column numbers, assuming A and B already sorted */ 1255 int imark = -1; 1256 if (v) { 1257 *v = v_p = mat->rowvalues; 1258 for (i=0; i<nzB; i++) { 1259 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1260 else break; 1261 } 1262 imark = i; 1263 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1264 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1265 } 1266 if (idx) { 1267 *idx = idx_p = mat->rowindices; 1268 if (imark > -1) { 1269 for (i=0; i<imark; i++) { 1270 idx_p[i] = cmap[cworkB[i]]; 1271 } 1272 } else { 1273 for (i=0; i<nzB; i++) { 1274 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1275 else break; 1276 } 1277 imark = i; 1278 } 1279 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1280 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1281 } 1282 } else { 1283 if (idx) *idx = 0; 1284 if (v) *v = 0; 1285 } 1286 } 1287 *nz = nztot; 1288 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1289 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1295 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1296 { 1297 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1298 1299 PetscFunctionBegin; 1300 if (aij->getrowactive == PETSC_FALSE) { 1301 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1302 } 1303 aij->getrowactive = PETSC_FALSE; 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatNorm_MPIAIJ" 1309 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1313 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1314 PetscReal sum = 0.0; 1315 PetscScalar *v; 1316 1317 PetscFunctionBegin; 1318 if (aij->size == 1) { 1319 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1320 } else { 1321 if (type == NORM_FROBENIUS) { 1322 v = amat->a; 1323 for (i=0; i<amat->nz; i++) { 1324 #if defined(PETSC_USE_COMPLEX) 1325 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1326 #else 1327 sum += (*v)*(*v); v++; 1328 #endif 1329 } 1330 v = bmat->a; 1331 for (i=0; i<bmat->nz; i++) { 1332 #if defined(PETSC_USE_COMPLEX) 1333 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1334 #else 1335 sum += (*v)*(*v); v++; 1336 #endif 1337 } 1338 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1339 *norm = sqrt(*norm); 1340 } else if (type == NORM_1) { /* max column norm */ 1341 PetscReal *tmp,*tmp2; 1342 int *jj,*garray = aij->garray; 1343 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1344 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1345 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1346 *norm = 0.0; 1347 v = amat->a; jj = amat->j; 1348 for (j=0; j<amat->nz; j++) { 1349 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1350 } 1351 v = bmat->a; jj = bmat->j; 1352 for (j=0; j<bmat->nz; j++) { 1353 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1354 } 1355 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1356 for (j=0; j<mat->N; j++) { 1357 if (tmp2[j] > *norm) *norm = tmp2[j]; 1358 } 1359 ierr = PetscFree(tmp);CHKERRQ(ierr); 1360 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1361 } else if (type == NORM_INFINITY) { /* max row norm */ 1362 PetscReal ntemp = 0.0; 1363 for (j=0; j<aij->A->m; j++) { 1364 v = amat->a + amat->i[j] + shift; 1365 sum = 0.0; 1366 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1367 sum += PetscAbsScalar(*v); v++; 1368 } 1369 v = bmat->a + bmat->i[j] + shift; 1370 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1371 sum += PetscAbsScalar(*v); v++; 1372 } 1373 if (sum > ntemp) ntemp = sum; 1374 } 1375 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1376 } else { 1377 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1378 } 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatTranspose_MPIAIJ" 1385 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1386 { 1387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1388 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1389 int ierr,shift = Aloc->indexshift; 1390 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1391 Mat B; 1392 PetscScalar *array; 1393 1394 PetscFunctionBegin; 1395 if (!matout && M != N) { 1396 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1397 } 1398 1399 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1400 1401 /* copy over the A part */ 1402 Aloc = (Mat_SeqAIJ*)a->A->data; 1403 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1404 row = a->rstart; 1405 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1406 for (i=0; i<m; i++) { 1407 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1408 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1409 } 1410 aj = Aloc->j; 1411 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1412 1413 /* copy over the B part */ 1414 Aloc = (Mat_SeqAIJ*)a->B->data; 1415 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1416 row = a->rstart; 1417 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1418 ct = cols; 1419 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1420 for (i=0; i<m; i++) { 1421 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1422 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1423 } 1424 ierr = PetscFree(ct);CHKERRQ(ierr); 1425 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1426 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1427 if (matout) { 1428 *matout = B; 1429 } else { 1430 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1431 } 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1437 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1438 { 1439 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1440 Mat a = aij->A,b = aij->B; 1441 int ierr,s1,s2,s3; 1442 1443 PetscFunctionBegin; 1444 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1445 if (rr) { 1446 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1447 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1448 /* Overlap communication with computation. */ 1449 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1450 } 1451 if (ll) { 1452 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1453 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1454 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1455 } 1456 /* scale the diagonal block */ 1457 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1458 1459 if (rr) { 1460 /* Do a scatter end and then right scale the off-diagonal block */ 1461 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1462 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1463 } 1464 1465 PetscFunctionReturn(0); 1466 } 1467 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1471 int MatPrintHelp_MPIAIJ(Mat A) 1472 { 1473 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1474 int ierr; 1475 1476 PetscFunctionBegin; 1477 if (!a->rank) { 1478 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1479 } 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1485 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1486 { 1487 PetscFunctionBegin; 1488 *bs = 1; 1489 PetscFunctionReturn(0); 1490 } 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1493 int MatSetUnfactored_MPIAIJ(Mat A) 1494 { 1495 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1496 int ierr; 1497 1498 PetscFunctionBegin; 1499 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatEqual_MPIAIJ" 1505 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1506 { 1507 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1508 Mat a,b,c,d; 1509 PetscTruth flg; 1510 int ierr; 1511 1512 PetscFunctionBegin; 1513 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1514 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1515 a = matA->A; b = matA->B; 1516 c = matB->A; d = matB->B; 1517 1518 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1519 if (flg == PETSC_TRUE) { 1520 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1521 } 1522 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatCopy_MPIAIJ" 1528 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1529 { 1530 int ierr; 1531 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1532 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1533 PetscTruth flg; 1534 1535 PetscFunctionBegin; 1536 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1537 if (str != SAME_NONZERO_PATTERN || !flg) { 1538 /* because of the column compression in the off-processor part of the matrix a->B, 1539 the number of columns in a->B and b->B may be different, hence we cannot call 1540 the MatCopy() directly on the two parts. If need be, we can provide a more 1541 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1542 then copying the submatrices */ 1543 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1544 } else { 1545 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1546 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1547 } 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1553 int MatSetUpPreallocation_MPIAIJ(Mat A) 1554 { 1555 int ierr; 1556 1557 PetscFunctionBegin; 1558 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1563 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1564 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1565 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1566 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1567 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1568 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1569 #endif 1570 1571 #include "petscblaslapack.h" 1572 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure); 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatAXPY_MPIAIJ" 1575 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1576 { 1577 int ierr,one=1,i; 1578 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1579 Mat_SeqAIJ *x,*y; 1580 1581 PetscFunctionBegin; 1582 if (str == SAME_NONZERO_PATTERN) { 1583 x = (Mat_SeqAIJ *)xx->A->data; 1584 y = (Mat_SeqAIJ *)yy->A->data; 1585 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1586 x = (Mat_SeqAIJ *)xx->B->data; 1587 y = (Mat_SeqAIJ *)yy->B->data; 1588 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1589 } else if (str == SUBSET_NONZERO_PATTERN) { 1590 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1591 1592 x = (Mat_SeqAIJ *)xx->B->data; 1593 y = (Mat_SeqAIJ *)yy->B->data; 1594 if (y->xtoy && y->XtoY != xx->B) { 1595 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1596 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1597 } 1598 if (!y->xtoy) { /* get xtoy */ 1599 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1600 y->XtoY = xx->B; 1601 } 1602 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1603 } else { 1604 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1605 } 1606 PetscFunctionReturn(0); 1607 } 1608 1609 /* -------------------------------------------------------------------*/ 1610 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1611 MatGetRow_MPIAIJ, 1612 MatRestoreRow_MPIAIJ, 1613 MatMult_MPIAIJ, 1614 MatMultAdd_MPIAIJ, 1615 MatMultTranspose_MPIAIJ, 1616 MatMultTransposeAdd_MPIAIJ, 1617 0, 1618 0, 1619 0, 1620 0, 1621 0, 1622 0, 1623 MatRelax_MPIAIJ, 1624 MatTranspose_MPIAIJ, 1625 MatGetInfo_MPIAIJ, 1626 MatEqual_MPIAIJ, 1627 MatGetDiagonal_MPIAIJ, 1628 MatDiagonalScale_MPIAIJ, 1629 MatNorm_MPIAIJ, 1630 MatAssemblyBegin_MPIAIJ, 1631 MatAssemblyEnd_MPIAIJ, 1632 0, 1633 MatSetOption_MPIAIJ, 1634 MatZeroEntries_MPIAIJ, 1635 MatZeroRows_MPIAIJ, 1636 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1637 MatLUFactorSymbolic_MPIAIJ_TFS, 1638 #else 1639 0, 1640 #endif 1641 0, 1642 0, 1643 0, 1644 MatSetUpPreallocation_MPIAIJ, 1645 0, 1646 0, 1647 0, 1648 0, 1649 MatDuplicate_MPIAIJ, 1650 0, 1651 0, 1652 0, 1653 0, 1654 MatAXPY_MPIAIJ, 1655 MatGetSubMatrices_MPIAIJ, 1656 MatIncreaseOverlap_MPIAIJ, 1657 MatGetValues_MPIAIJ, 1658 MatCopy_MPIAIJ, 1659 MatPrintHelp_MPIAIJ, 1660 MatScale_MPIAIJ, 1661 0, 1662 0, 1663 0, 1664 MatGetBlockSize_MPIAIJ, 1665 0, 1666 0, 1667 0, 1668 0, 1669 MatFDColoringCreate_MPIAIJ, 1670 0, 1671 MatSetUnfactored_MPIAIJ, 1672 0, 1673 0, 1674 MatGetSubMatrix_MPIAIJ, 1675 MatDestroy_MPIAIJ, 1676 MatView_MPIAIJ, 1677 MatGetPetscMaps_Petsc, 1678 0, 1679 0, 1680 0, 1681 0, 1682 0, 1683 0, 1684 0, 1685 0, 1686 MatSetColoring_MPIAIJ, 1687 MatSetValuesAdic_MPIAIJ, 1688 MatSetValuesAdifor_MPIAIJ 1689 }; 1690 1691 /* ----------------------------------------------------------------------------------------*/ 1692 1693 EXTERN_C_BEGIN 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1696 int MatStoreValues_MPIAIJ(Mat mat) 1697 { 1698 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1699 int ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1703 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 EXTERN_C_END 1707 1708 EXTERN_C_BEGIN 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1711 int MatRetrieveValues_MPIAIJ(Mat mat) 1712 { 1713 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1714 int ierr; 1715 1716 PetscFunctionBegin; 1717 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1718 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 } 1721 EXTERN_C_END 1722 1723 #include "petscpc.h" 1724 EXTERN_C_BEGIN 1725 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1726 EXTERN_C_END 1727 1728 EXTERN_C_BEGIN 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatCreate_MPIAIJ" 1731 int MatCreate_MPIAIJ(Mat B) 1732 { 1733 Mat_MPIAIJ *b; 1734 int ierr,i,size; 1735 1736 PetscFunctionBegin; 1737 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1738 1739 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1740 B->data = (void*)b; 1741 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1742 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1743 B->factor = 0; 1744 B->assembled = PETSC_FALSE; 1745 B->mapping = 0; 1746 1747 B->insertmode = NOT_SET_VALUES; 1748 b->size = size; 1749 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1750 1751 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1752 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1753 1754 /* the information in the maps duplicates the information computed below, eventually 1755 we should remove the duplicate information that is not contained in the maps */ 1756 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1757 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1758 1759 /* build local table of row and column ownerships */ 1760 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1761 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1762 b->cowners = b->rowners + b->size + 2; 1763 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1764 b->rowners[0] = 0; 1765 for (i=2; i<=b->size; i++) { 1766 b->rowners[i] += b->rowners[i-1]; 1767 } 1768 b->rstart = b->rowners[b->rank]; 1769 b->rend = b->rowners[b->rank+1]; 1770 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1771 b->cowners[0] = 0; 1772 for (i=2; i<=b->size; i++) { 1773 b->cowners[i] += b->cowners[i-1]; 1774 } 1775 b->cstart = b->cowners[b->rank]; 1776 b->cend = b->cowners[b->rank+1]; 1777 1778 /* build cache for off array entries formed */ 1779 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1780 b->donotstash = PETSC_FALSE; 1781 b->colmap = 0; 1782 b->garray = 0; 1783 b->roworiented = PETSC_TRUE; 1784 1785 /* stuff used for matrix vector multiply */ 1786 b->lvec = PETSC_NULL; 1787 b->Mvctx = PETSC_NULL; 1788 1789 /* stuff for MatGetRow() */ 1790 b->rowindices = 0; 1791 b->rowvalues = 0; 1792 b->getrowactive = PETSC_FALSE; 1793 1794 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1795 "MatStoreValues_MPIAIJ", 1796 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1797 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1798 "MatRetrieveValues_MPIAIJ", 1799 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1800 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1801 "MatGetDiagonalBlock_MPIAIJ", 1802 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1803 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1804 "MatIsSymmetric_MPIAIJ", 1805 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 EXTERN_C_END 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1812 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1813 { 1814 Mat mat; 1815 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1816 int ierr; 1817 1818 PetscFunctionBegin; 1819 *newmat = 0; 1820 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1821 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1822 a = (Mat_MPIAIJ*)mat->data; 1823 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1824 mat->factor = matin->factor; 1825 mat->assembled = PETSC_TRUE; 1826 mat->insertmode = NOT_SET_VALUES; 1827 mat->preallocated = PETSC_TRUE; 1828 1829 a->rstart = oldmat->rstart; 1830 a->rend = oldmat->rend; 1831 a->cstart = oldmat->cstart; 1832 a->cend = oldmat->cend; 1833 a->size = oldmat->size; 1834 a->rank = oldmat->rank; 1835 a->donotstash = oldmat->donotstash; 1836 a->roworiented = oldmat->roworiented; 1837 a->rowindices = 0; 1838 a->rowvalues = 0; 1839 a->getrowactive = PETSC_FALSE; 1840 1841 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1842 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1843 if (oldmat->colmap) { 1844 #if defined (PETSC_USE_CTABLE) 1845 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1846 #else 1847 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1848 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1849 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1850 #endif 1851 } else a->colmap = 0; 1852 if (oldmat->garray) { 1853 int len; 1854 len = oldmat->B->n; 1855 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1856 PetscLogObjectMemory(mat,len*sizeof(int)); 1857 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1858 } else a->garray = 0; 1859 1860 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1861 PetscLogObjectParent(mat,a->lvec); 1862 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1863 PetscLogObjectParent(mat,a->Mvctx); 1864 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1865 PetscLogObjectParent(mat,a->A); 1866 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1867 PetscLogObjectParent(mat,a->B); 1868 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1869 *newmat = mat; 1870 PetscFunctionReturn(0); 1871 } 1872 1873 #include "petscsys.h" 1874 1875 EXTERN_C_BEGIN 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatLoad_MPIAIJ" 1878 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1879 { 1880 Mat A; 1881 PetscScalar *vals,*svals; 1882 MPI_Comm comm = ((PetscObject)viewer)->comm; 1883 MPI_Status status; 1884 int i,nz,ierr,j,rstart,rend,fd; 1885 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1886 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1887 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1888 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS) 1889 PetscTruth flag; 1890 #endif 1891 1892 PetscFunctionBegin; 1893 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1894 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1895 if (!rank) { 1896 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1897 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1898 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1899 if (header[3] < 0) { 1900 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1901 } 1902 } 1903 1904 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1905 M = header[1]; N = header[2]; 1906 /* determine ownership of all rows */ 1907 m = M/size + ((M % size) > rank); 1908 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1909 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1910 rowners[0] = 0; 1911 for (i=2; i<=size; i++) { 1912 rowners[i] += rowners[i-1]; 1913 } 1914 rstart = rowners[rank]; 1915 rend = rowners[rank+1]; 1916 1917 /* distribute row lengths to all processors */ 1918 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1919 offlens = ourlens + (rend-rstart); 1920 if (!rank) { 1921 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1922 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1923 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1924 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1925 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1926 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1927 } else { 1928 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1929 } 1930 1931 if (!rank) { 1932 /* calculate the number of nonzeros on each processor */ 1933 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1934 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1935 for (i=0; i<size; i++) { 1936 for (j=rowners[i]; j< rowners[i+1]; j++) { 1937 procsnz[i] += rowlengths[j]; 1938 } 1939 } 1940 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1941 1942 /* determine max buffer needed and allocate it */ 1943 maxnz = 0; 1944 for (i=0; i<size; i++) { 1945 maxnz = PetscMax(maxnz,procsnz[i]); 1946 } 1947 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1948 1949 /* read in my part of the matrix column indices */ 1950 nz = procsnz[0]; 1951 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1952 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1953 1954 /* read in every one elses and ship off */ 1955 for (i=1; i<size; i++) { 1956 nz = procsnz[i]; 1957 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1958 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1959 } 1960 ierr = PetscFree(cols);CHKERRQ(ierr); 1961 } else { 1962 /* determine buffer space needed for message */ 1963 nz = 0; 1964 for (i=0; i<m; i++) { 1965 nz += ourlens[i]; 1966 } 1967 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1968 1969 /* receive message of column indices*/ 1970 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1971 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1972 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1973 } 1974 1975 /* determine column ownership if matrix is not square */ 1976 if (N != M) { 1977 n = N/size + ((N % size) > rank); 1978 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1979 cstart = cend - n; 1980 } else { 1981 cstart = rstart; 1982 cend = rend; 1983 n = cend - cstart; 1984 } 1985 1986 /* loop over local rows, determining number of off diagonal entries */ 1987 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1988 jj = 0; 1989 for (i=0; i<m; i++) { 1990 for (j=0; j<ourlens[i]; j++) { 1991 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1992 jj++; 1993 } 1994 } 1995 1996 /* create our matrix */ 1997 for (i=0; i<m; i++) { 1998 ourlens[i] -= offlens[i]; 1999 } 2000 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2001 A = *newmat; 2002 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2003 for (i=0; i<m; i++) { 2004 ourlens[i] += offlens[i]; 2005 } 2006 2007 if (!rank) { 2008 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2009 2010 /* read in my part of the matrix numerical values */ 2011 nz = procsnz[0]; 2012 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2013 2014 /* insert into matrix */ 2015 jj = rstart; 2016 smycols = mycols; 2017 svals = vals; 2018 for (i=0; i<m; i++) { 2019 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2020 smycols += ourlens[i]; 2021 svals += ourlens[i]; 2022 jj++; 2023 } 2024 2025 /* read in other processors and ship out */ 2026 for (i=1; i<size; i++) { 2027 nz = procsnz[i]; 2028 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2029 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2030 } 2031 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2032 } else { 2033 /* receive numeric values */ 2034 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2035 2036 /* receive message of values*/ 2037 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2038 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2039 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2040 2041 /* insert into matrix */ 2042 jj = rstart; 2043 smycols = mycols; 2044 svals = vals; 2045 for (i=0; i<m; i++) { 2046 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2047 smycols += ourlens[i]; 2048 svals += ourlens[i]; 2049 jj++; 2050 } 2051 } 2052 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2053 ierr = PetscFree(vals);CHKERRQ(ierr); 2054 ierr = PetscFree(mycols);CHKERRQ(ierr); 2055 ierr = PetscFree(rowners);CHKERRQ(ierr); 2056 2057 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2058 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2059 #if defined(PETSC_HAVE_SPOOLES) 2060 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 2061 if (flag) { 2062 if (size == 1) { 2063 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2064 } else { 2065 ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); 2066 } 2067 } 2068 #endif 2069 #if defined(PETSC_HAVE_SUPERLUDIST) 2070 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 2071 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 2072 #endif 2073 #if defined(PETSC_HAVE_MUMPS) 2074 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 2075 if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); } 2076 #endif 2077 PetscFunctionReturn(0); 2078 } 2079 EXTERN_C_END 2080 2081 #undef __FUNCT__ 2082 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2083 /* 2084 Not great since it makes two copies of the submatrix, first an SeqAIJ 2085 in local and then by concatenating the local matrices the end result. 2086 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2087 */ 2088 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2089 { 2090 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2091 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2092 Mat *local,M,Mreuse; 2093 PetscScalar *vwork,*aa; 2094 MPI_Comm comm = mat->comm; 2095 Mat_SeqAIJ *aij; 2096 2097 2098 PetscFunctionBegin; 2099 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2101 2102 if (call == MAT_REUSE_MATRIX) { 2103 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2104 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2105 local = &Mreuse; 2106 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2107 } else { 2108 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2109 Mreuse = *local; 2110 ierr = PetscFree(local);CHKERRQ(ierr); 2111 } 2112 2113 /* 2114 m - number of local rows 2115 n - number of columns (same on all processors) 2116 rstart - first row in new global matrix generated 2117 */ 2118 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2119 if (call == MAT_INITIAL_MATRIX) { 2120 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2121 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2122 ii = aij->i; 2123 jj = aij->j; 2124 2125 /* 2126 Determine the number of non-zeros in the diagonal and off-diagonal 2127 portions of the matrix in order to do correct preallocation 2128 */ 2129 2130 /* first get start and end of "diagonal" columns */ 2131 if (csize == PETSC_DECIDE) { 2132 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2133 if (mglobal == n) { /* square matrix */ 2134 nlocal = m; 2135 } else { 2136 nlocal = n/size + ((n % size) > rank); 2137 } 2138 } else { 2139 nlocal = csize; 2140 } 2141 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2142 rstart = rend - nlocal; 2143 if (rank == size - 1 && rend != n) { 2144 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2145 } 2146 2147 /* next, compute all the lengths */ 2148 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2149 olens = dlens + m; 2150 for (i=0; i<m; i++) { 2151 jend = ii[i+1] - ii[i]; 2152 olen = 0; 2153 dlen = 0; 2154 for (j=0; j<jend; j++) { 2155 if (*jj < rstart || *jj >= rend) olen++; 2156 else dlen++; 2157 jj++; 2158 } 2159 olens[i] = olen; 2160 dlens[i] = dlen; 2161 } 2162 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2163 ierr = PetscFree(dlens);CHKERRQ(ierr); 2164 } else { 2165 int ml,nl; 2166 2167 M = *newmat; 2168 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2169 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2170 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2171 /* 2172 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2173 rather than the slower MatSetValues(). 2174 */ 2175 M->was_assembled = PETSC_TRUE; 2176 M->assembled = PETSC_FALSE; 2177 } 2178 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2179 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2180 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2181 ii = aij->i; 2182 jj = aij->j; 2183 aa = aij->a; 2184 for (i=0; i<m; i++) { 2185 row = rstart + i; 2186 nz = ii[i+1] - ii[i]; 2187 cwork = jj; jj += nz; 2188 vwork = aa; aa += nz; 2189 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2190 } 2191 2192 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2193 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2194 *newmat = M; 2195 2196 /* save submatrix used in processor for next request */ 2197 if (call == MAT_INITIAL_MATRIX) { 2198 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2199 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2200 } 2201 2202 PetscFunctionReturn(0); 2203 } 2204 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2207 /*@C 2208 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2209 (the default parallel PETSc format). For good matrix assembly performance 2210 the user should preallocate the matrix storage by setting the parameters 2211 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2212 performance can be increased by more than a factor of 50. 2213 2214 Collective on MPI_Comm 2215 2216 Input Parameters: 2217 + A - the matrix 2218 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2219 (same value is used for all local rows) 2220 . d_nnz - array containing the number of nonzeros in the various rows of the 2221 DIAGONAL portion of the local submatrix (possibly different for each row) 2222 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2223 The size of this array is equal to the number of local rows, i.e 'm'. 2224 You must leave room for the diagonal entry even if it is zero. 2225 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2226 submatrix (same value is used for all local rows). 2227 - o_nnz - array containing the number of nonzeros in the various rows of the 2228 OFF-DIAGONAL portion of the local submatrix (possibly different for 2229 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2230 structure. The size of this array is equal to the number 2231 of local rows, i.e 'm'. 2232 2233 The AIJ format (also called the Yale sparse matrix format or 2234 compressed row storage), is fully compatible with standard Fortran 77 2235 storage. That is, the stored row and column indices can begin at 2236 either one (as in Fortran) or zero. See the users manual for details. 2237 2238 The user MUST specify either the local or global matrix dimensions 2239 (possibly both). 2240 2241 The parallel matrix is partitioned such that the first m0 rows belong to 2242 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2243 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2244 2245 The DIAGONAL portion of the local submatrix of a processor can be defined 2246 as the submatrix which is obtained by extraction the part corresponding 2247 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2248 first row that belongs to the processor, and r2 is the last row belonging 2249 to the this processor. This is a square mxm matrix. The remaining portion 2250 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2251 2252 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2253 2254 By default, this format uses inodes (identical nodes) when possible. 2255 We search for consecutive rows with the same nonzero structure, thereby 2256 reusing matrix information to achieve increased efficiency. 2257 2258 Options Database Keys: 2259 + -mat_aij_no_inode - Do not use inodes 2260 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2261 - -mat_aij_oneindex - Internally use indexing starting at 1 2262 rather than 0. Note that when calling MatSetValues(), 2263 the user still MUST index entries starting at 0! 2264 2265 Example usage: 2266 2267 Consider the following 8x8 matrix with 34 non-zero values, that is 2268 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2269 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2270 as follows: 2271 2272 .vb 2273 1 2 0 | 0 3 0 | 0 4 2274 Proc0 0 5 6 | 7 0 0 | 8 0 2275 9 0 10 | 11 0 0 | 12 0 2276 ------------------------------------- 2277 13 0 14 | 15 16 17 | 0 0 2278 Proc1 0 18 0 | 19 20 21 | 0 0 2279 0 0 0 | 22 23 0 | 24 0 2280 ------------------------------------- 2281 Proc2 25 26 27 | 0 0 28 | 29 0 2282 30 0 0 | 31 32 33 | 0 34 2283 .ve 2284 2285 This can be represented as a collection of submatrices as: 2286 2287 .vb 2288 A B C 2289 D E F 2290 G H I 2291 .ve 2292 2293 Where the submatrices A,B,C are owned by proc0, D,E,F are 2294 owned by proc1, G,H,I are owned by proc2. 2295 2296 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2297 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2298 The 'M','N' parameters are 8,8, and have the same values on all procs. 2299 2300 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2301 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2302 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2303 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2304 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2305 matrix, ans [DF] as another SeqAIJ matrix. 2306 2307 When d_nz, o_nz parameters are specified, d_nz storage elements are 2308 allocated for every row of the local diagonal submatrix, and o_nz 2309 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2310 One way to choose d_nz and o_nz is to use the max nonzerors per local 2311 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2312 In this case, the values of d_nz,o_nz are: 2313 .vb 2314 proc0 : dnz = 2, o_nz = 2 2315 proc1 : dnz = 3, o_nz = 2 2316 proc2 : dnz = 1, o_nz = 4 2317 .ve 2318 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2319 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2320 for proc3. i.e we are using 12+15+10=37 storage locations to store 2321 34 values. 2322 2323 When d_nnz, o_nnz parameters are specified, the storage is specified 2324 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2325 In the above case the values for d_nnz,o_nnz are: 2326 .vb 2327 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2328 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2329 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2330 .ve 2331 Here the space allocated is sum of all the above values i.e 34, and 2332 hence pre-allocation is perfect. 2333 2334 Level: intermediate 2335 2336 .keywords: matrix, aij, compressed row, sparse, parallel 2337 2338 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2339 @*/ 2340 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2341 { 2342 Mat_MPIAIJ *b; 2343 int ierr,i; 2344 PetscTruth flg2; 2345 2346 PetscFunctionBegin; 2347 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2348 if (!flg2) PetscFunctionReturn(0); 2349 B->preallocated = PETSC_TRUE; 2350 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2351 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2352 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2353 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2354 if (d_nnz) { 2355 for (i=0; i<B->m; i++) { 2356 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2357 } 2358 } 2359 if (o_nnz) { 2360 for (i=0; i<B->m; i++) { 2361 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2362 } 2363 } 2364 b = (Mat_MPIAIJ*)B->data; 2365 2366 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2367 PetscLogObjectParent(B,b->A); 2368 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2369 PetscLogObjectParent(B,b->B); 2370 2371 PetscFunctionReturn(0); 2372 } 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatCreateMPIAIJ" 2376 /*@C 2377 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2378 (the default parallel PETSc format). For good matrix assembly performance 2379 the user should preallocate the matrix storage by setting the parameters 2380 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2381 performance can be increased by more than a factor of 50. 2382 2383 Collective on MPI_Comm 2384 2385 Input Parameters: 2386 + comm - MPI communicator 2387 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2388 This value should be the same as the local size used in creating the 2389 y vector for the matrix-vector product y = Ax. 2390 . n - This value should be the same as the local size used in creating the 2391 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2392 calculated if N is given) For square matrices n is almost always m. 2393 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2394 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2395 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2396 (same value is used for all local rows) 2397 . d_nnz - array containing the number of nonzeros in the various rows of the 2398 DIAGONAL portion of the local submatrix (possibly different for each row) 2399 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2400 The size of this array is equal to the number of local rows, i.e 'm'. 2401 You must leave room for the diagonal entry even if it is zero. 2402 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2403 submatrix (same value is used for all local rows). 2404 - o_nnz - array containing the number of nonzeros in the various rows of the 2405 OFF-DIAGONAL portion of the local submatrix (possibly different for 2406 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2407 structure. The size of this array is equal to the number 2408 of local rows, i.e 'm'. 2409 2410 Output Parameter: 2411 . A - the matrix 2412 2413 Notes: 2414 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2415 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2416 storage requirements for this matrix. 2417 2418 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2419 processor than it must be used on all processors that share the object for 2420 that argument. 2421 2422 The AIJ format (also called the Yale sparse matrix format or 2423 compressed row storage), is fully compatible with standard Fortran 77 2424 storage. That is, the stored row and column indices can begin at 2425 either one (as in Fortran) or zero. See the users manual for details. 2426 2427 The user MUST specify either the local or global matrix dimensions 2428 (possibly both). 2429 2430 The parallel matrix is partitioned such that the first m0 rows belong to 2431 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2432 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2433 2434 The DIAGONAL portion of the local submatrix of a processor can be defined 2435 as the submatrix which is obtained by extraction the part corresponding 2436 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2437 first row that belongs to the processor, and r2 is the last row belonging 2438 to the this processor. This is a square mxm matrix. The remaining portion 2439 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2440 2441 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2442 2443 By default, this format uses inodes (identical nodes) when possible. 2444 We search for consecutive rows with the same nonzero structure, thereby 2445 reusing matrix information to achieve increased efficiency. 2446 2447 Options Database Keys: 2448 + -mat_aij_no_inode - Do not use inodes 2449 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2450 - -mat_aij_oneindex - Internally use indexing starting at 1 2451 rather than 0. Note that when calling MatSetValues(), 2452 the user still MUST index entries starting at 0! 2453 2454 2455 Example usage: 2456 2457 Consider the following 8x8 matrix with 34 non-zero values, that is 2458 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2459 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2460 as follows: 2461 2462 .vb 2463 1 2 0 | 0 3 0 | 0 4 2464 Proc0 0 5 6 | 7 0 0 | 8 0 2465 9 0 10 | 11 0 0 | 12 0 2466 ------------------------------------- 2467 13 0 14 | 15 16 17 | 0 0 2468 Proc1 0 18 0 | 19 20 21 | 0 0 2469 0 0 0 | 22 23 0 | 24 0 2470 ------------------------------------- 2471 Proc2 25 26 27 | 0 0 28 | 29 0 2472 30 0 0 | 31 32 33 | 0 34 2473 .ve 2474 2475 This can be represented as a collection of submatrices as: 2476 2477 .vb 2478 A B C 2479 D E F 2480 G H I 2481 .ve 2482 2483 Where the submatrices A,B,C are owned by proc0, D,E,F are 2484 owned by proc1, G,H,I are owned by proc2. 2485 2486 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2487 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2488 The 'M','N' parameters are 8,8, and have the same values on all procs. 2489 2490 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2491 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2492 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2493 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2494 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2495 matrix, ans [DF] as another SeqAIJ matrix. 2496 2497 When d_nz, o_nz parameters are specified, d_nz storage elements are 2498 allocated for every row of the local diagonal submatrix, and o_nz 2499 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2500 One way to choose d_nz and o_nz is to use the max nonzerors per local 2501 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2502 In this case, the values of d_nz,o_nz are: 2503 .vb 2504 proc0 : dnz = 2, o_nz = 2 2505 proc1 : dnz = 3, o_nz = 2 2506 proc2 : dnz = 1, o_nz = 4 2507 .ve 2508 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2509 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2510 for proc3. i.e we are using 12+15+10=37 storage locations to store 2511 34 values. 2512 2513 When d_nnz, o_nnz parameters are specified, the storage is specified 2514 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2515 In the above case the values for d_nnz,o_nnz are: 2516 .vb 2517 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2518 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2519 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2520 .ve 2521 Here the space allocated is sum of all the above values i.e 34, and 2522 hence pre-allocation is perfect. 2523 2524 Level: intermediate 2525 2526 .keywords: matrix, aij, compressed row, sparse, parallel 2527 2528 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2529 @*/ 2530 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2531 { 2532 int ierr,size; 2533 2534 PetscFunctionBegin; 2535 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2536 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2537 if (size > 1) { 2538 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2539 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2540 } else { 2541 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2542 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2543 } 2544 PetscFunctionReturn(0); 2545 } 2546 2547 #undef __FUNCT__ 2548 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2549 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2550 { 2551 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2552 PetscFunctionBegin; 2553 *Ad = a->A; 2554 *Ao = a->B; 2555 *colmap = a->garray; 2556 PetscFunctionReturn(0); 2557 } 2558 2559 #undef __FUNCT__ 2560 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2561 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2562 { 2563 int ierr,i; 2564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2565 2566 PetscFunctionBegin; 2567 if (coloring->ctype == IS_COLORING_LOCAL) { 2568 ISColoringValue *allcolors,*colors; 2569 ISColoring ocoloring; 2570 2571 /* set coloring for diagonal portion */ 2572 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2573 2574 /* set coloring for off-diagonal portion */ 2575 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2576 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2577 for (i=0; i<a->B->n; i++) { 2578 colors[i] = allcolors[a->garray[i]]; 2579 } 2580 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2581 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2582 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2583 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2584 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2585 ISColoringValue *colors; 2586 int *larray; 2587 ISColoring ocoloring; 2588 2589 /* set coloring for diagonal portion */ 2590 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2591 for (i=0; i<a->A->n; i++) { 2592 larray[i] = i + a->cstart; 2593 } 2594 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2595 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2596 for (i=0; i<a->A->n; i++) { 2597 colors[i] = coloring->colors[larray[i]]; 2598 } 2599 ierr = PetscFree(larray);CHKERRQ(ierr); 2600 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2601 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2602 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2603 2604 /* set coloring for off-diagonal portion */ 2605 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2606 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2607 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2608 for (i=0; i<a->B->n; i++) { 2609 colors[i] = coloring->colors[larray[i]]; 2610 } 2611 ierr = PetscFree(larray);CHKERRQ(ierr); 2612 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2613 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2614 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2615 } else { 2616 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2617 } 2618 2619 PetscFunctionReturn(0); 2620 } 2621 2622 #undef __FUNCT__ 2623 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2624 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2625 { 2626 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2627 int ierr; 2628 2629 PetscFunctionBegin; 2630 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2631 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2632 PetscFunctionReturn(0); 2633 } 2634 2635 #undef __FUNCT__ 2636 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2637 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2638 { 2639 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2640 int ierr; 2641 2642 PetscFunctionBegin; 2643 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2644 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2645 PetscFunctionReturn(0); 2646 } 2647 2648 #undef __FUNCT__ 2649 #define __FUNCT__ "MatMerge" 2650 /*@C 2651 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2652 matrices from each processor 2653 2654 Collective on MPI_Comm 2655 2656 Input Parameters: 2657 + comm - the communicators the parallel matrix will live on 2658 - inmat - the input sequential matrices 2659 2660 Output Parameter: 2661 . outmat - the parallel matrix generated 2662 2663 Level: advanced 2664 2665 Notes: The number of columns of the matrix in EACH of the seperate files 2666 MUST be the same. 2667 2668 @*/ 2669 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2670 { 2671 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2672 PetscScalar *values; 2673 PetscMap columnmap,rowmap; 2674 2675 PetscFunctionBegin; 2676 2677 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2678 2679 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2680 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2681 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2682 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2683 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2684 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2685 2686 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2687 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2688 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2689 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2690 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2691 2692 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2693 for (i=0;i<m;i++) { 2694 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2695 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2696 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2697 } 2698 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2699 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2700 2701 for (i=0;i<m;i++) { 2702 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2703 I = i + rstart; 2704 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2705 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2706 } 2707 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2708 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2709 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2710 2711 PetscFunctionReturn(0); 2712 } 2713 2714 #undef __FUNCT__ 2715 #define __FUNCT__ "MatFileSplit" 2716 int MatFileSplit(Mat A,char *outfile) 2717 { 2718 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2719 PetscViewer out; 2720 char *name; 2721 Mat B; 2722 PetscScalar *values; 2723 2724 PetscFunctionBegin; 2725 2726 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2727 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2728 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2729 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2730 for (i=0;i<m;i++) { 2731 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2732 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2733 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2734 } 2735 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2736 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2737 2738 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2739 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2740 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2741 sprintf(name,"%s.%d",outfile,rank); 2742 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2743 ierr = PetscFree(name); 2744 ierr = MatView(B,out);CHKERRQ(ierr); 2745 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2746 ierr = MatDestroy(B);CHKERRQ(ierr); 2747 PetscFunctionReturn(0); 2748 } 2749