1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift=0; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 289 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 294 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 #if defined(PETSC_HAVE_SUPERLUDIST) 413 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 414 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 415 #endif 416 417 #if defined(PETSC_HAVE_MUMPS) 418 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 419 if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); } 420 #endif 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 426 int MatZeroEntries_MPIAIJ(Mat A) 427 { 428 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 429 int ierr; 430 431 PetscFunctionBegin; 432 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 433 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 434 PetscFunctionReturn(0); 435 } 436 437 #undef __FUNCT__ 438 #define __FUNCT__ "MatZeroRows_MPIAIJ" 439 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 440 { 441 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 442 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 443 int *nprocs,j,idx,nsends,row; 444 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 445 int *rvalues,tag = A->tag,count,base,slen,n,*source; 446 int *lens,imdex,*lrows,*values,rstart=l->rstart; 447 MPI_Comm comm = A->comm; 448 MPI_Request *send_waits,*recv_waits; 449 MPI_Status recv_status,*send_status; 450 IS istmp; 451 PetscTruth found; 452 453 PetscFunctionBegin; 454 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 455 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 456 457 /* first count number of contributors to each processor */ 458 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 459 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 460 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 461 for (i=0; i<N; i++) { 462 idx = rows[i]; 463 found = PETSC_FALSE; 464 for (j=0; j<size; j++) { 465 if (idx >= owners[j] && idx < owners[j+1]) { 466 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 467 } 468 } 469 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 470 } 471 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 472 473 /* inform other processors of number of messages and max length*/ 474 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 475 476 /* post receives: */ 477 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 478 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 479 for (i=0; i<nrecvs; i++) { 480 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 481 } 482 483 /* do sends: 484 1) starts[i] gives the starting index in svalues for stuff going to 485 the ith processor 486 */ 487 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 488 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 489 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 490 starts[0] = 0; 491 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 492 for (i=0; i<N; i++) { 493 svalues[starts[owner[i]]++] = rows[i]; 494 } 495 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 496 497 starts[0] = 0; 498 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 499 count = 0; 500 for (i=0; i<size; i++) { 501 if (nprocs[2*i+1]) { 502 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 503 } 504 } 505 ierr = PetscFree(starts);CHKERRQ(ierr); 506 507 base = owners[rank]; 508 509 /* wait on receives */ 510 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 511 source = lens + nrecvs; 512 count = nrecvs; slen = 0; 513 while (count) { 514 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 515 /* unpack receives into our local space */ 516 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 517 source[imdex] = recv_status.MPI_SOURCE; 518 lens[imdex] = n; 519 slen += n; 520 count--; 521 } 522 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 523 524 /* move the data into the send scatter */ 525 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 526 count = 0; 527 for (i=0; i<nrecvs; i++) { 528 values = rvalues + i*nmax; 529 for (j=0; j<lens[i]; j++) { 530 lrows[count++] = values[j] - base; 531 } 532 } 533 ierr = PetscFree(rvalues);CHKERRQ(ierr); 534 ierr = PetscFree(lens);CHKERRQ(ierr); 535 ierr = PetscFree(owner);CHKERRQ(ierr); 536 ierr = PetscFree(nprocs);CHKERRQ(ierr); 537 538 /* actually zap the local rows */ 539 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 540 PetscLogObjectParent(A,istmp); 541 542 /* 543 Zero the required rows. If the "diagonal block" of the matrix 544 is square and the user wishes to set the diagonal we use seperate 545 code so that MatSetValues() is not called for each diagonal allocating 546 new memory, thus calling lots of mallocs and slowing things down. 547 548 Contributed by: Mathew Knepley 549 */ 550 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 551 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 552 if (diag && (l->A->M == l->A->N)) { 553 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 554 } else if (diag) { 555 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 556 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 557 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 558 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 559 } 560 for (i = 0; i < slen; i++) { 561 row = lrows[i] + rstart; 562 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 563 } 564 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 565 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 566 } else { 567 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 568 } 569 ierr = ISDestroy(istmp);CHKERRQ(ierr); 570 ierr = PetscFree(lrows);CHKERRQ(ierr); 571 572 /* wait on sends */ 573 if (nsends) { 574 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 575 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 576 ierr = PetscFree(send_status);CHKERRQ(ierr); 577 } 578 ierr = PetscFree(send_waits);CHKERRQ(ierr); 579 ierr = PetscFree(svalues);CHKERRQ(ierr); 580 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatMult_MPIAIJ" 586 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 587 { 588 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 589 int ierr,nt; 590 591 PetscFunctionBegin; 592 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 593 if (nt != A->n) { 594 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 595 } 596 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 597 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 598 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 603 #undef __FUNCT__ 604 #define __FUNCT__ "MatMultAdd_MPIAIJ" 605 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 606 { 607 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 608 int ierr; 609 610 PetscFunctionBegin; 611 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 612 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 613 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 614 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 618 #undef __FUNCT__ 619 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 620 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 621 { 622 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 623 int ierr; 624 625 PetscFunctionBegin; 626 /* do nondiagonal part */ 627 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 628 /* send it on its way */ 629 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 /* do local part */ 631 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 632 /* receive remote parts: note this assumes the values are not actually */ 633 /* inserted in yy until the next line, which is true for my implementation*/ 634 /* but is not perhaps always true. */ 635 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 639 EXTERN_C_BEGIN 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 642 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 643 { 644 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 645 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 646 IS Me,Notme; 647 int M,N,first,last,*notme,i, ierr; 648 649 PetscFunctionBegin; 650 651 /* Easy test: symmetric diagonal block */ 652 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 653 ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr); 654 if (!*f) PetscFunctionReturn(0); 655 656 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 657 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 658 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 659 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 660 for (i=0; i<first; i++) notme[i] = i; 661 for (i=last; i<M; i++) notme[i-last+first] = i; 662 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 663 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 664 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 665 Aoff = Aoffs[0]; 666 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 667 Boff = Boffs[0]; 668 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 669 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 670 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 671 ierr = ISDestroy(Me); CHKERRQ(ierr); 672 ierr = ISDestroy(Notme); CHKERRQ(ierr); 673 674 PetscFunctionReturn(0); 675 } 676 EXTERN_C_END 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 680 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 681 { 682 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 683 int ierr; 684 685 PetscFunctionBegin; 686 /* do nondiagonal part */ 687 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 688 /* send it on its way */ 689 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 690 /* do local part */ 691 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 692 /* receive remote parts: note this assumes the values are not actually */ 693 /* inserted in yy until the next line, which is true for my implementation*/ 694 /* but is not perhaps always true. */ 695 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } 698 699 /* 700 This only works correctly for square matrices where the subblock A->A is the 701 diagonal block 702 */ 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 705 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 706 { 707 int ierr; 708 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 709 710 PetscFunctionBegin; 711 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 712 if (a->rstart != a->cstart || a->rend != a->cend) { 713 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 714 } 715 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 #undef __FUNCT__ 720 #define __FUNCT__ "MatScale_MPIAIJ" 721 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 722 { 723 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 724 int ierr; 725 726 PetscFunctionBegin; 727 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 728 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatDestroy_MPIAIJ" 734 int MatDestroy_MPIAIJ(Mat mat) 735 { 736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 737 int ierr; 738 739 PetscFunctionBegin; 740 #if defined(PETSC_USE_LOG) 741 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 742 #endif 743 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 744 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 745 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 746 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 747 #if defined (PETSC_USE_CTABLE) 748 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 749 #else 750 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 751 #endif 752 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 753 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 754 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 755 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 756 ierr = PetscFree(aij);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759 760 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 761 extern int MatFactorInfo_MUMPS(Mat,PetscViewer); 762 763 #undef __FUNCT__ 764 #define __FUNCT__ "MatView_MPIAIJ_Binary" 765 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 769 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 770 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 771 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 772 PetscScalar *column_values; 773 774 PetscFunctionBegin; 775 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 776 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 777 nz = A->nz + B->nz; 778 if (rank == 0) { 779 header[0] = MAT_FILE_COOKIE; 780 header[1] = mat->M; 781 header[2] = mat->N; 782 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 783 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 784 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 785 /* get largest number of rows any processor has */ 786 rlen = mat->m; 787 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 788 for (i=1; i<size; i++) { 789 rlen = PetscMax(rlen,range[i+1] - range[i]); 790 } 791 } else { 792 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 793 rlen = mat->m; 794 } 795 796 /* load up the local row counts */ 797 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 798 for (i=0; i<mat->m; i++) { 799 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 800 } 801 802 /* store the row lengths to the file */ 803 if (rank == 0) { 804 MPI_Status status; 805 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 806 for (i=1; i<size; i++) { 807 rlen = range[i+1] - range[i]; 808 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 809 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 810 } 811 } else { 812 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 813 } 814 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 815 816 /* load up the local column indices */ 817 nzmax = nz; /* )th processor needs space a largest processor needs */ 818 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 819 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 820 cnt = 0; 821 for (i=0; i<mat->m; i++) { 822 for (j=B->i[i]; j<B->i[i+1]; j++) { 823 if ( (col = garray[B->j[j]]) > cstart) break; 824 column_indices[cnt++] = col; 825 } 826 for (k=A->i[i]; k<A->i[i+1]; k++) { 827 column_indices[cnt++] = A->j[k] + cstart; 828 } 829 for (; j<B->i[i+1]; j++) { 830 column_indices[cnt++] = garray[B->j[j]]; 831 } 832 } 833 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 834 835 /* store the column indices to the file */ 836 if (rank == 0) { 837 MPI_Status status; 838 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 839 for (i=1; i<size; i++) { 840 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 841 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 842 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 843 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 844 } 845 } else { 846 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 847 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 848 } 849 ierr = PetscFree(column_indices);CHKERRQ(ierr); 850 851 /* load up the local column values */ 852 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 853 cnt = 0; 854 for (i=0; i<mat->m; i++) { 855 for (j=B->i[i]; j<B->i[i+1]; j++) { 856 if ( garray[B->j[j]] > cstart) break; 857 column_values[cnt++] = B->a[j]; 858 } 859 for (k=A->i[i]; k<A->i[i+1]; k++) { 860 column_values[cnt++] = A->a[k]; 861 } 862 for (; j<B->i[i+1]; j++) { 863 column_values[cnt++] = B->a[j]; 864 } 865 } 866 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 867 868 /* store the column values to the file */ 869 if (rank == 0) { 870 MPI_Status status; 871 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 872 for (i=1; i<size; i++) { 873 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 874 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 875 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 876 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 877 } 878 } else { 879 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 880 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 881 } 882 ierr = PetscFree(column_values);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 888 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 889 { 890 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 891 int ierr,rank = aij->rank,size = aij->size; 892 PetscTruth isdraw,isascii,flg,isbinary; 893 PetscViewer sviewer; 894 PetscViewerFormat format; 895 896 PetscFunctionBegin; 897 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 898 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 899 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 900 if (isascii) { 901 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 902 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 903 MatInfo info; 904 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 905 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 906 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 907 if (flg) { 908 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 909 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 910 } else { 911 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 912 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 913 } 914 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 915 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 916 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 917 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 918 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 919 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 920 PetscFunctionReturn(0); 921 } else if (format == PETSC_VIEWER_ASCII_INFO) { 922 PetscFunctionReturn(0); 923 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 924 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 925 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 926 #endif 927 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE) 928 ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr); 929 #endif 930 PetscFunctionReturn(0); 931 } 932 } else if (isbinary) { 933 if (size == 1) { 934 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 935 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 936 } else { 937 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 938 } 939 PetscFunctionReturn(0); 940 } else if (isdraw) { 941 PetscDraw draw; 942 PetscTruth isnull; 943 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 944 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 945 } 946 947 if (size == 1) { 948 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 949 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 950 } else { 951 /* assemble the entire matrix onto first processor. */ 952 Mat A; 953 Mat_SeqAIJ *Aloc; 954 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 955 PetscScalar *a; 956 957 if (!rank) { 958 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 959 } else { 960 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 961 } 962 PetscLogObjectParent(mat,A); 963 964 /* copy over the A part */ 965 Aloc = (Mat_SeqAIJ*)aij->A->data; 966 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 967 row = aij->rstart; 968 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 969 for (i=0; i<m; i++) { 970 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 971 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 972 } 973 aj = Aloc->j; 974 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 975 976 /* copy over the B part */ 977 Aloc = (Mat_SeqAIJ*)aij->B->data; 978 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 979 row = aij->rstart; 980 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 981 ct = cols; 982 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 983 for (i=0; i<m; i++) { 984 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 985 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 986 } 987 ierr = PetscFree(ct);CHKERRQ(ierr); 988 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 989 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 990 /* 991 Everyone has to call to draw the matrix since the graphics waits are 992 synchronized across all processors that share the PetscDraw object 993 */ 994 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 995 if (!rank) { 996 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 997 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 998 } 999 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1000 ierr = MatDestroy(A);CHKERRQ(ierr); 1001 } 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatView_MPIAIJ" 1007 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1008 { 1009 int ierr; 1010 PetscTruth isascii,isdraw,issocket,isbinary; 1011 1012 PetscFunctionBegin; 1013 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1014 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1015 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1016 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1017 if (isascii || isdraw || isbinary || issocket) { 1018 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1019 } else { 1020 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1021 } 1022 PetscFunctionReturn(0); 1023 } 1024 1025 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatRelax_MPIAIJ" 1029 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1030 { 1031 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1032 int ierr; 1033 Vec bb1; 1034 PetscScalar mone=-1.0; 1035 1036 PetscFunctionBegin; 1037 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1038 1039 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1040 1041 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1042 if (flag & SOR_ZERO_INITIAL_GUESS) { 1043 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1044 its--; 1045 } 1046 1047 while (its--) { 1048 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1049 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1050 1051 /* update rhs: bb1 = bb - B*x */ 1052 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1053 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1054 1055 /* local sweep */ 1056 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1057 CHKERRQ(ierr); 1058 } 1059 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1060 if (flag & SOR_ZERO_INITIAL_GUESS) { 1061 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1062 its--; 1063 } 1064 while (its--) { 1065 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1066 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1067 1068 /* update rhs: bb1 = bb - B*x */ 1069 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1070 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1071 1072 /* local sweep */ 1073 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1074 CHKERRQ(ierr); 1075 } 1076 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1077 if (flag & SOR_ZERO_INITIAL_GUESS) { 1078 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1079 its--; 1080 } 1081 while (its--) { 1082 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1083 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1084 1085 /* update rhs: bb1 = bb - B*x */ 1086 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1087 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1088 1089 /* local sweep */ 1090 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1091 CHKERRQ(ierr); 1092 } 1093 } else { 1094 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1095 } 1096 1097 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1103 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1104 { 1105 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1106 Mat A = mat->A,B = mat->B; 1107 int ierr; 1108 PetscReal isend[5],irecv[5]; 1109 1110 PetscFunctionBegin; 1111 info->block_size = 1.0; 1112 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1113 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1114 isend[3] = info->memory; isend[4] = info->mallocs; 1115 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1116 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1117 isend[3] += info->memory; isend[4] += info->mallocs; 1118 if (flag == MAT_LOCAL) { 1119 info->nz_used = isend[0]; 1120 info->nz_allocated = isend[1]; 1121 info->nz_unneeded = isend[2]; 1122 info->memory = isend[3]; 1123 info->mallocs = isend[4]; 1124 } else if (flag == MAT_GLOBAL_MAX) { 1125 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1126 info->nz_used = irecv[0]; 1127 info->nz_allocated = irecv[1]; 1128 info->nz_unneeded = irecv[2]; 1129 info->memory = irecv[3]; 1130 info->mallocs = irecv[4]; 1131 } else if (flag == MAT_GLOBAL_SUM) { 1132 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1133 info->nz_used = irecv[0]; 1134 info->nz_allocated = irecv[1]; 1135 info->nz_unneeded = irecv[2]; 1136 info->memory = irecv[3]; 1137 info->mallocs = irecv[4]; 1138 } 1139 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1140 info->fill_ratio_needed = 0; 1141 info->factor_mallocs = 0; 1142 info->rows_global = (double)matin->M; 1143 info->columns_global = (double)matin->N; 1144 info->rows_local = (double)matin->m; 1145 info->columns_local = (double)matin->N; 1146 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatSetOption_MPIAIJ" 1152 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1153 { 1154 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1155 int ierr; 1156 1157 PetscFunctionBegin; 1158 switch (op) { 1159 case MAT_NO_NEW_NONZERO_LOCATIONS: 1160 case MAT_YES_NEW_NONZERO_LOCATIONS: 1161 case MAT_COLUMNS_UNSORTED: 1162 case MAT_COLUMNS_SORTED: 1163 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1164 case MAT_KEEP_ZEROED_ROWS: 1165 case MAT_NEW_NONZERO_LOCATION_ERR: 1166 case MAT_USE_INODES: 1167 case MAT_DO_NOT_USE_INODES: 1168 case MAT_IGNORE_ZERO_ENTRIES: 1169 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1170 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1171 break; 1172 case MAT_ROW_ORIENTED: 1173 a->roworiented = PETSC_TRUE; 1174 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1175 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1176 break; 1177 case MAT_ROWS_SORTED: 1178 case MAT_ROWS_UNSORTED: 1179 case MAT_YES_NEW_DIAGONALS: 1180 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1181 break; 1182 case MAT_COLUMN_ORIENTED: 1183 a->roworiented = PETSC_FALSE; 1184 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1185 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1186 break; 1187 case MAT_IGNORE_OFF_PROC_ENTRIES: 1188 a->donotstash = PETSC_TRUE; 1189 break; 1190 case MAT_NO_NEW_DIAGONALS: 1191 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1192 default: 1193 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatGetRow_MPIAIJ" 1200 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1201 { 1202 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1203 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1204 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1205 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1206 int *cmap,*idx_p; 1207 1208 PetscFunctionBegin; 1209 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1210 mat->getrowactive = PETSC_TRUE; 1211 1212 if (!mat->rowvalues && (idx || v)) { 1213 /* 1214 allocate enough space to hold information from the longest row. 1215 */ 1216 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1217 int max = 1,tmp; 1218 for (i=0; i<matin->m; i++) { 1219 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1220 if (max < tmp) { max = tmp; } 1221 } 1222 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1223 mat->rowindices = (int*)(mat->rowvalues + max); 1224 } 1225 1226 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1227 lrow = row - rstart; 1228 1229 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1230 if (!v) {pvA = 0; pvB = 0;} 1231 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1232 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1233 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1234 nztot = nzA + nzB; 1235 1236 cmap = mat->garray; 1237 if (v || idx) { 1238 if (nztot) { 1239 /* Sort by increasing column numbers, assuming A and B already sorted */ 1240 int imark = -1; 1241 if (v) { 1242 *v = v_p = mat->rowvalues; 1243 for (i=0; i<nzB; i++) { 1244 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1245 else break; 1246 } 1247 imark = i; 1248 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1249 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1250 } 1251 if (idx) { 1252 *idx = idx_p = mat->rowindices; 1253 if (imark > -1) { 1254 for (i=0; i<imark; i++) { 1255 idx_p[i] = cmap[cworkB[i]]; 1256 } 1257 } else { 1258 for (i=0; i<nzB; i++) { 1259 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1260 else break; 1261 } 1262 imark = i; 1263 } 1264 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1265 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1266 } 1267 } else { 1268 if (idx) *idx = 0; 1269 if (v) *v = 0; 1270 } 1271 } 1272 *nz = nztot; 1273 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1274 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1280 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1281 { 1282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1283 1284 PetscFunctionBegin; 1285 if (aij->getrowactive == PETSC_FALSE) { 1286 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1287 } 1288 aij->getrowactive = PETSC_FALSE; 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatNorm_MPIAIJ" 1294 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1295 { 1296 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1297 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1298 int ierr,i,j,cstart = aij->cstart; 1299 PetscReal sum = 0.0; 1300 PetscScalar *v; 1301 1302 PetscFunctionBegin; 1303 if (aij->size == 1) { 1304 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1305 } else { 1306 if (type == NORM_FROBENIUS) { 1307 v = amat->a; 1308 for (i=0; i<amat->nz; i++) { 1309 #if defined(PETSC_USE_COMPLEX) 1310 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1311 #else 1312 sum += (*v)*(*v); v++; 1313 #endif 1314 } 1315 v = bmat->a; 1316 for (i=0; i<bmat->nz; i++) { 1317 #if defined(PETSC_USE_COMPLEX) 1318 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1319 #else 1320 sum += (*v)*(*v); v++; 1321 #endif 1322 } 1323 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1324 *norm = sqrt(*norm); 1325 } else if (type == NORM_1) { /* max column norm */ 1326 PetscReal *tmp,*tmp2; 1327 int *jj,*garray = aij->garray; 1328 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1329 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1330 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1331 *norm = 0.0; 1332 v = amat->a; jj = amat->j; 1333 for (j=0; j<amat->nz; j++) { 1334 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1335 } 1336 v = bmat->a; jj = bmat->j; 1337 for (j=0; j<bmat->nz; j++) { 1338 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1339 } 1340 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1341 for (j=0; j<mat->N; j++) { 1342 if (tmp2[j] > *norm) *norm = tmp2[j]; 1343 } 1344 ierr = PetscFree(tmp);CHKERRQ(ierr); 1345 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1346 } else if (type == NORM_INFINITY) { /* max row norm */ 1347 PetscReal ntemp = 0.0; 1348 for (j=0; j<aij->A->m; j++) { 1349 v = amat->a + amat->i[j]; 1350 sum = 0.0; 1351 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1352 sum += PetscAbsScalar(*v); v++; 1353 } 1354 v = bmat->a + bmat->i[j]; 1355 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1356 sum += PetscAbsScalar(*v); v++; 1357 } 1358 if (sum > ntemp) ntemp = sum; 1359 } 1360 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1361 } else { 1362 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1363 } 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatTranspose_MPIAIJ" 1370 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1371 { 1372 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1373 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1374 int ierr; 1375 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1376 Mat B; 1377 PetscScalar *array; 1378 1379 PetscFunctionBegin; 1380 if (!matout && M != N) { 1381 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1382 } 1383 1384 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1385 1386 /* copy over the A part */ 1387 Aloc = (Mat_SeqAIJ*)a->A->data; 1388 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1389 row = a->rstart; 1390 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1391 for (i=0; i<m; i++) { 1392 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1393 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1394 } 1395 aj = Aloc->j; 1396 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1397 1398 /* copy over the B part */ 1399 Aloc = (Mat_SeqAIJ*)a->B->data; 1400 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1401 row = a->rstart; 1402 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1403 ct = cols; 1404 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1405 for (i=0; i<m; i++) { 1406 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1407 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1408 } 1409 ierr = PetscFree(ct);CHKERRQ(ierr); 1410 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 if (matout) { 1413 *matout = B; 1414 } else { 1415 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1416 } 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1422 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1423 { 1424 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1425 Mat a = aij->A,b = aij->B; 1426 int ierr,s1,s2,s3; 1427 1428 PetscFunctionBegin; 1429 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1430 if (rr) { 1431 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1432 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1433 /* Overlap communication with computation. */ 1434 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1435 } 1436 if (ll) { 1437 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1438 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1439 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1440 } 1441 /* scale the diagonal block */ 1442 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1443 1444 if (rr) { 1445 /* Do a scatter end and then right scale the off-diagonal block */ 1446 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1447 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1448 } 1449 1450 PetscFunctionReturn(0); 1451 } 1452 1453 1454 #undef __FUNCT__ 1455 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1456 int MatPrintHelp_MPIAIJ(Mat A) 1457 { 1458 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1459 int ierr; 1460 1461 PetscFunctionBegin; 1462 if (!a->rank) { 1463 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1470 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1471 { 1472 PetscFunctionBegin; 1473 *bs = 1; 1474 PetscFunctionReturn(0); 1475 } 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1478 int MatSetUnfactored_MPIAIJ(Mat A) 1479 { 1480 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1481 int ierr; 1482 1483 PetscFunctionBegin; 1484 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatEqual_MPIAIJ" 1490 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1491 { 1492 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1493 Mat a,b,c,d; 1494 PetscTruth flg; 1495 int ierr; 1496 1497 PetscFunctionBegin; 1498 a = matA->A; b = matA->B; 1499 c = matB->A; d = matB->B; 1500 1501 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1502 if (flg == PETSC_TRUE) { 1503 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1504 } 1505 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatCopy_MPIAIJ" 1511 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1512 { 1513 int ierr; 1514 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1515 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1516 1517 PetscFunctionBegin; 1518 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1519 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1520 /* because of the column compression in the off-processor part of the matrix a->B, 1521 the number of columns in a->B and b->B may be different, hence we cannot call 1522 the MatCopy() directly on the two parts. If need be, we can provide a more 1523 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1524 then copying the submatrices */ 1525 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1526 } else { 1527 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1528 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1529 } 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1535 int MatSetUpPreallocation_MPIAIJ(Mat A) 1536 { 1537 int ierr; 1538 1539 PetscFunctionBegin; 1540 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1545 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1546 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1547 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 1548 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1549 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1550 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1551 #endif 1552 1553 #include "petscblaslapack.h" 1554 extern int MatAXPY_SeqAIJ(const PetscScalar[],Mat,Mat,MatStructure); 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatAXPY_MPIAIJ" 1557 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1558 { 1559 int ierr,one=1,i; 1560 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1561 Mat_SeqAIJ *x,*y; 1562 1563 PetscFunctionBegin; 1564 if (str == SAME_NONZERO_PATTERN) { 1565 x = (Mat_SeqAIJ *)xx->A->data; 1566 y = (Mat_SeqAIJ *)yy->A->data; 1567 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1568 x = (Mat_SeqAIJ *)xx->B->data; 1569 y = (Mat_SeqAIJ *)yy->B->data; 1570 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1571 } else if (str == SUBSET_NONZERO_PATTERN) { 1572 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1573 1574 x = (Mat_SeqAIJ *)xx->B->data; 1575 y = (Mat_SeqAIJ *)yy->B->data; 1576 if (y->xtoy && y->XtoY != xx->B) { 1577 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1578 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1579 } 1580 if (!y->xtoy) { /* get xtoy */ 1581 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1582 y->XtoY = xx->B; 1583 } 1584 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1585 } else { 1586 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 /* -------------------------------------------------------------------*/ 1592 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1593 MatGetRow_MPIAIJ, 1594 MatRestoreRow_MPIAIJ, 1595 MatMult_MPIAIJ, 1596 MatMultAdd_MPIAIJ, 1597 MatMultTranspose_MPIAIJ, 1598 MatMultTransposeAdd_MPIAIJ, 1599 0, 1600 0, 1601 0, 1602 0, 1603 0, 1604 0, 1605 MatRelax_MPIAIJ, 1606 MatTranspose_MPIAIJ, 1607 MatGetInfo_MPIAIJ, 1608 MatEqual_MPIAIJ, 1609 MatGetDiagonal_MPIAIJ, 1610 MatDiagonalScale_MPIAIJ, 1611 MatNorm_MPIAIJ, 1612 MatAssemblyBegin_MPIAIJ, 1613 MatAssemblyEnd_MPIAIJ, 1614 0, 1615 MatSetOption_MPIAIJ, 1616 MatZeroEntries_MPIAIJ, 1617 MatZeroRows_MPIAIJ, 1618 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1619 MatLUFactorSymbolic_MPIAIJ_TFS, 1620 #else 1621 0, 1622 #endif 1623 0, 1624 0, 1625 0, 1626 MatSetUpPreallocation_MPIAIJ, 1627 0, 1628 0, 1629 0, 1630 0, 1631 MatDuplicate_MPIAIJ, 1632 0, 1633 0, 1634 0, 1635 0, 1636 MatAXPY_MPIAIJ, 1637 MatGetSubMatrices_MPIAIJ, 1638 MatIncreaseOverlap_MPIAIJ, 1639 MatGetValues_MPIAIJ, 1640 MatCopy_MPIAIJ, 1641 MatPrintHelp_MPIAIJ, 1642 MatScale_MPIAIJ, 1643 0, 1644 0, 1645 0, 1646 MatGetBlockSize_MPIAIJ, 1647 0, 1648 0, 1649 0, 1650 0, 1651 MatFDColoringCreate_MPIAIJ, 1652 0, 1653 MatSetUnfactored_MPIAIJ, 1654 0, 1655 0, 1656 MatGetSubMatrix_MPIAIJ, 1657 MatDestroy_MPIAIJ, 1658 MatView_MPIAIJ, 1659 MatGetPetscMaps_Petsc, 1660 0, 1661 0, 1662 0, 1663 0, 1664 0, 1665 0, 1666 0, 1667 0, 1668 MatSetColoring_MPIAIJ, 1669 MatSetValuesAdic_MPIAIJ, 1670 MatSetValuesAdifor_MPIAIJ 1671 }; 1672 1673 /* ----------------------------------------------------------------------------------------*/ 1674 1675 EXTERN_C_BEGIN 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1678 int MatStoreValues_MPIAIJ(Mat mat) 1679 { 1680 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1681 int ierr; 1682 1683 PetscFunctionBegin; 1684 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1685 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 EXTERN_C_END 1689 1690 EXTERN_C_BEGIN 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1693 int MatRetrieveValues_MPIAIJ(Mat mat) 1694 { 1695 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1696 int ierr; 1697 1698 PetscFunctionBegin; 1699 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1700 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 EXTERN_C_END 1704 1705 #include "petscpc.h" 1706 EXTERN_C_BEGIN 1707 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1708 EXTERN int MatDiagonalScaleLocal_MPIAIJ(Mat,Vec); 1709 EXTERN_C_END 1710 1711 EXTERN_C_BEGIN 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1714 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1715 { 1716 Mat_MPIAIJ *b; 1717 int ierr,i; 1718 1719 PetscFunctionBegin; 1720 B->preallocated = PETSC_TRUE; 1721 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1722 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1723 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1724 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1725 if (d_nnz) { 1726 for (i=0; i<B->m; i++) { 1727 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1728 } 1729 } 1730 if (o_nnz) { 1731 for (i=0; i<B->m; i++) { 1732 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1733 } 1734 } 1735 b = (Mat_MPIAIJ*)B->data; 1736 1737 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1738 PetscLogObjectParent(B,b->A); 1739 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1740 PetscLogObjectParent(B,b->B); 1741 1742 PetscFunctionReturn(0); 1743 } 1744 EXTERN_C_END 1745 1746 EXTERN_C_BEGIN 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatCreate_MPIAIJ" 1749 int MatCreate_MPIAIJ(Mat B) 1750 { 1751 Mat_MPIAIJ *b; 1752 int ierr,i,size; 1753 1754 PetscFunctionBegin; 1755 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1756 1757 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1758 B->data = (void*)b; 1759 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1760 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1761 B->factor = 0; 1762 B->assembled = PETSC_FALSE; 1763 B->mapping = 0; 1764 1765 B->insertmode = NOT_SET_VALUES; 1766 b->size = size; 1767 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1768 1769 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1770 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1771 1772 /* the information in the maps duplicates the information computed below, eventually 1773 we should remove the duplicate information that is not contained in the maps */ 1774 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1775 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1776 1777 /* build local table of row and column ownerships */ 1778 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1779 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1780 b->cowners = b->rowners + b->size + 2; 1781 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1782 b->rowners[0] = 0; 1783 for (i=2; i<=b->size; i++) { 1784 b->rowners[i] += b->rowners[i-1]; 1785 } 1786 b->rstart = b->rowners[b->rank]; 1787 b->rend = b->rowners[b->rank+1]; 1788 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1789 b->cowners[0] = 0; 1790 for (i=2; i<=b->size; i++) { 1791 b->cowners[i] += b->cowners[i-1]; 1792 } 1793 b->cstart = b->cowners[b->rank]; 1794 b->cend = b->cowners[b->rank+1]; 1795 1796 /* build cache for off array entries formed */ 1797 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1798 b->donotstash = PETSC_FALSE; 1799 b->colmap = 0; 1800 b->garray = 0; 1801 b->roworiented = PETSC_TRUE; 1802 1803 /* stuff used for matrix vector multiply */ 1804 b->lvec = PETSC_NULL; 1805 b->Mvctx = PETSC_NULL; 1806 1807 /* stuff for MatGetRow() */ 1808 b->rowindices = 0; 1809 b->rowvalues = 0; 1810 b->getrowactive = PETSC_FALSE; 1811 1812 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1813 "MatStoreValues_MPIAIJ", 1814 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1815 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1816 "MatRetrieveValues_MPIAIJ", 1817 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1818 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1819 "MatGetDiagonalBlock_MPIAIJ", 1820 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1821 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1822 "MatIsSymmetric_MPIAIJ", 1823 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1824 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1825 "MatMPIAIJSetPreallocation_MPIAIJ", 1826 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1827 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1828 "MatDiagonalScaleLocal_MPIAIJ", 1829 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1830 PetscFunctionReturn(0); 1831 } 1832 EXTERN_C_END 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1836 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1837 { 1838 Mat mat; 1839 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1840 int ierr; 1841 1842 PetscFunctionBegin; 1843 *newmat = 0; 1844 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1845 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1846 a = (Mat_MPIAIJ*)mat->data; 1847 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1848 mat->factor = matin->factor; 1849 mat->assembled = PETSC_TRUE; 1850 mat->insertmode = NOT_SET_VALUES; 1851 mat->preallocated = PETSC_TRUE; 1852 1853 a->rstart = oldmat->rstart; 1854 a->rend = oldmat->rend; 1855 a->cstart = oldmat->cstart; 1856 a->cend = oldmat->cend; 1857 a->size = oldmat->size; 1858 a->rank = oldmat->rank; 1859 a->donotstash = oldmat->donotstash; 1860 a->roworiented = oldmat->roworiented; 1861 a->rowindices = 0; 1862 a->rowvalues = 0; 1863 a->getrowactive = PETSC_FALSE; 1864 1865 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1866 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1867 if (oldmat->colmap) { 1868 #if defined (PETSC_USE_CTABLE) 1869 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1870 #else 1871 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1872 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1873 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1874 #endif 1875 } else a->colmap = 0; 1876 if (oldmat->garray) { 1877 int len; 1878 len = oldmat->B->n; 1879 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1880 PetscLogObjectMemory(mat,len*sizeof(int)); 1881 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1882 } else a->garray = 0; 1883 1884 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1885 PetscLogObjectParent(mat,a->lvec); 1886 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1887 PetscLogObjectParent(mat,a->Mvctx); 1888 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1889 PetscLogObjectParent(mat,a->A); 1890 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1891 PetscLogObjectParent(mat,a->B); 1892 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1893 *newmat = mat; 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #include "petscsys.h" 1898 1899 EXTERN_C_BEGIN 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatLoad_MPIAIJ" 1902 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1903 { 1904 Mat A; 1905 PetscScalar *vals,*svals; 1906 MPI_Comm comm = ((PetscObject)viewer)->comm; 1907 MPI_Status status; 1908 int i,nz,ierr,j,rstart,rend,fd; 1909 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1910 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1911 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1912 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS) 1913 PetscTruth flag; 1914 #endif 1915 1916 PetscFunctionBegin; 1917 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1918 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1919 if (!rank) { 1920 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1921 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1922 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1923 if (header[3] < 0) { 1924 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1925 } 1926 } 1927 1928 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1929 M = header[1]; N = header[2]; 1930 /* determine ownership of all rows */ 1931 m = M/size + ((M % size) > rank); 1932 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1933 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1934 rowners[0] = 0; 1935 for (i=2; i<=size; i++) { 1936 rowners[i] += rowners[i-1]; 1937 } 1938 rstart = rowners[rank]; 1939 rend = rowners[rank+1]; 1940 1941 /* distribute row lengths to all processors */ 1942 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1943 offlens = ourlens + (rend-rstart); 1944 if (!rank) { 1945 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1946 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1947 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1948 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1949 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1950 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1951 } else { 1952 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1953 } 1954 1955 if (!rank) { 1956 /* calculate the number of nonzeros on each processor */ 1957 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1958 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1959 for (i=0; i<size; i++) { 1960 for (j=rowners[i]; j< rowners[i+1]; j++) { 1961 procsnz[i] += rowlengths[j]; 1962 } 1963 } 1964 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1965 1966 /* determine max buffer needed and allocate it */ 1967 maxnz = 0; 1968 for (i=0; i<size; i++) { 1969 maxnz = PetscMax(maxnz,procsnz[i]); 1970 } 1971 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1972 1973 /* read in my part of the matrix column indices */ 1974 nz = procsnz[0]; 1975 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1976 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1977 1978 /* read in every one elses and ship off */ 1979 for (i=1; i<size; i++) { 1980 nz = procsnz[i]; 1981 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1982 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1983 } 1984 ierr = PetscFree(cols);CHKERRQ(ierr); 1985 } else { 1986 /* determine buffer space needed for message */ 1987 nz = 0; 1988 for (i=0; i<m; i++) { 1989 nz += ourlens[i]; 1990 } 1991 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1992 1993 /* receive message of column indices*/ 1994 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1995 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1996 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1997 } 1998 1999 /* determine column ownership if matrix is not square */ 2000 if (N != M) { 2001 n = N/size + ((N % size) > rank); 2002 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2003 cstart = cend - n; 2004 } else { 2005 cstart = rstart; 2006 cend = rend; 2007 n = cend - cstart; 2008 } 2009 2010 /* loop over local rows, determining number of off diagonal entries */ 2011 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2012 jj = 0; 2013 for (i=0; i<m; i++) { 2014 for (j=0; j<ourlens[i]; j++) { 2015 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2016 jj++; 2017 } 2018 } 2019 2020 /* create our matrix */ 2021 for (i=0; i<m; i++) { 2022 ourlens[i] -= offlens[i]; 2023 } 2024 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2025 ierr = MatSetType(A,type);CHKERRQ(ierr); 2026 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2027 2028 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2029 for (i=0; i<m; i++) { 2030 ourlens[i] += offlens[i]; 2031 } 2032 2033 if (!rank) { 2034 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2035 2036 /* read in my part of the matrix numerical values */ 2037 nz = procsnz[0]; 2038 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2039 2040 /* insert into matrix */ 2041 jj = rstart; 2042 smycols = mycols; 2043 svals = vals; 2044 for (i=0; i<m; i++) { 2045 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2046 smycols += ourlens[i]; 2047 svals += ourlens[i]; 2048 jj++; 2049 } 2050 2051 /* read in other processors and ship out */ 2052 for (i=1; i<size; i++) { 2053 nz = procsnz[i]; 2054 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2055 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2056 } 2057 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2058 } else { 2059 /* receive numeric values */ 2060 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2061 2062 /* receive message of values*/ 2063 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2064 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2065 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2066 2067 /* insert into matrix */ 2068 jj = rstart; 2069 smycols = mycols; 2070 svals = vals; 2071 for (i=0; i<m; i++) { 2072 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2073 smycols += ourlens[i]; 2074 svals += ourlens[i]; 2075 jj++; 2076 } 2077 } 2078 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2079 ierr = PetscFree(vals);CHKERRQ(ierr); 2080 ierr = PetscFree(mycols);CHKERRQ(ierr); 2081 ierr = PetscFree(rowners);CHKERRQ(ierr); 2082 2083 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2084 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2085 #if defined(PETSC_HAVE_SUPERLUDIST) 2086 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 2087 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 2088 #endif 2089 #if defined(PETSC_HAVE_MUMPS) 2090 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 2091 if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); } 2092 #endif 2093 *newmat = A; 2094 PetscFunctionReturn(0); 2095 } 2096 EXTERN_C_END 2097 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2100 /* 2101 Not great since it makes two copies of the submatrix, first an SeqAIJ 2102 in local and then by concatenating the local matrices the end result. 2103 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2104 */ 2105 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2106 { 2107 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2108 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2109 Mat *local,M,Mreuse; 2110 PetscScalar *vwork,*aa; 2111 MPI_Comm comm = mat->comm; 2112 Mat_SeqAIJ *aij; 2113 2114 2115 PetscFunctionBegin; 2116 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2117 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2118 2119 if (call == MAT_REUSE_MATRIX) { 2120 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2121 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2122 local = &Mreuse; 2123 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2124 } else { 2125 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2126 Mreuse = *local; 2127 ierr = PetscFree(local);CHKERRQ(ierr); 2128 } 2129 2130 /* 2131 m - number of local rows 2132 n - number of columns (same on all processors) 2133 rstart - first row in new global matrix generated 2134 */ 2135 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2136 if (call == MAT_INITIAL_MATRIX) { 2137 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2138 ii = aij->i; 2139 jj = aij->j; 2140 2141 /* 2142 Determine the number of non-zeros in the diagonal and off-diagonal 2143 portions of the matrix in order to do correct preallocation 2144 */ 2145 2146 /* first get start and end of "diagonal" columns */ 2147 if (csize == PETSC_DECIDE) { 2148 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2149 if (mglobal == n) { /* square matrix */ 2150 nlocal = m; 2151 } else { 2152 nlocal = n/size + ((n % size) > rank); 2153 } 2154 } else { 2155 nlocal = csize; 2156 } 2157 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2158 rstart = rend - nlocal; 2159 if (rank == size - 1 && rend != n) { 2160 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2161 } 2162 2163 /* next, compute all the lengths */ 2164 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2165 olens = dlens + m; 2166 for (i=0; i<m; i++) { 2167 jend = ii[i+1] - ii[i]; 2168 olen = 0; 2169 dlen = 0; 2170 for (j=0; j<jend; j++) { 2171 if (*jj < rstart || *jj >= rend) olen++; 2172 else dlen++; 2173 jj++; 2174 } 2175 olens[i] = olen; 2176 dlens[i] = dlen; 2177 } 2178 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2179 ierr = PetscFree(dlens);CHKERRQ(ierr); 2180 } else { 2181 int ml,nl; 2182 2183 M = *newmat; 2184 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2185 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2186 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2187 /* 2188 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2189 rather than the slower MatSetValues(). 2190 */ 2191 M->was_assembled = PETSC_TRUE; 2192 M->assembled = PETSC_FALSE; 2193 } 2194 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2195 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2196 ii = aij->i; 2197 jj = aij->j; 2198 aa = aij->a; 2199 for (i=0; i<m; i++) { 2200 row = rstart + i; 2201 nz = ii[i+1] - ii[i]; 2202 cwork = jj; jj += nz; 2203 vwork = aa; aa += nz; 2204 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2205 } 2206 2207 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2208 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2209 *newmat = M; 2210 2211 /* save submatrix used in processor for next request */ 2212 if (call == MAT_INITIAL_MATRIX) { 2213 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2214 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2215 } 2216 2217 PetscFunctionReturn(0); 2218 } 2219 2220 #undef __FUNCT__ 2221 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2222 /*@C 2223 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2224 (the default parallel PETSc format). For good matrix assembly performance 2225 the user should preallocate the matrix storage by setting the parameters 2226 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2227 performance can be increased by more than a factor of 50. 2228 2229 Collective on MPI_Comm 2230 2231 Input Parameters: 2232 + A - the matrix 2233 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2234 (same value is used for all local rows) 2235 . d_nnz - array containing the number of nonzeros in the various rows of the 2236 DIAGONAL portion of the local submatrix (possibly different for each row) 2237 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2238 The size of this array is equal to the number of local rows, i.e 'm'. 2239 You must leave room for the diagonal entry even if it is zero. 2240 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2241 submatrix (same value is used for all local rows). 2242 - o_nnz - array containing the number of nonzeros in the various rows of the 2243 OFF-DIAGONAL portion of the local submatrix (possibly different for 2244 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2245 structure. The size of this array is equal to the number 2246 of local rows, i.e 'm'. 2247 2248 The AIJ format (also called the Yale sparse matrix format or 2249 compressed row storage), is fully compatible with standard Fortran 77 2250 storage. That is, the stored row and column indices can begin at 2251 either one (as in Fortran) or zero. See the users manual for details. 2252 2253 The user MUST specify either the local or global matrix dimensions 2254 (possibly both). 2255 2256 The parallel matrix is partitioned such that the first m0 rows belong to 2257 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2258 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2259 2260 The DIAGONAL portion of the local submatrix of a processor can be defined 2261 as the submatrix which is obtained by extraction the part corresponding 2262 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2263 first row that belongs to the processor, and r2 is the last row belonging 2264 to the this processor. This is a square mxm matrix. The remaining portion 2265 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2266 2267 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2268 2269 By default, this format uses inodes (identical nodes) when possible. 2270 We search for consecutive rows with the same nonzero structure, thereby 2271 reusing matrix information to achieve increased efficiency. 2272 2273 Options Database Keys: 2274 + -mat_aij_no_inode - Do not use inodes 2275 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2276 - -mat_aij_oneindex - Internally use indexing starting at 1 2277 rather than 0. Note that when calling MatSetValues(), 2278 the user still MUST index entries starting at 0! 2279 2280 Example usage: 2281 2282 Consider the following 8x8 matrix with 34 non-zero values, that is 2283 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2284 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2285 as follows: 2286 2287 .vb 2288 1 2 0 | 0 3 0 | 0 4 2289 Proc0 0 5 6 | 7 0 0 | 8 0 2290 9 0 10 | 11 0 0 | 12 0 2291 ------------------------------------- 2292 13 0 14 | 15 16 17 | 0 0 2293 Proc1 0 18 0 | 19 20 21 | 0 0 2294 0 0 0 | 22 23 0 | 24 0 2295 ------------------------------------- 2296 Proc2 25 26 27 | 0 0 28 | 29 0 2297 30 0 0 | 31 32 33 | 0 34 2298 .ve 2299 2300 This can be represented as a collection of submatrices as: 2301 2302 .vb 2303 A B C 2304 D E F 2305 G H I 2306 .ve 2307 2308 Where the submatrices A,B,C are owned by proc0, D,E,F are 2309 owned by proc1, G,H,I are owned by proc2. 2310 2311 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2312 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2313 The 'M','N' parameters are 8,8, and have the same values on all procs. 2314 2315 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2316 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2317 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2318 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2319 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2320 matrix, ans [DF] as another SeqAIJ matrix. 2321 2322 When d_nz, o_nz parameters are specified, d_nz storage elements are 2323 allocated for every row of the local diagonal submatrix, and o_nz 2324 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2325 One way to choose d_nz and o_nz is to use the max nonzerors per local 2326 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2327 In this case, the values of d_nz,o_nz are: 2328 .vb 2329 proc0 : dnz = 2, o_nz = 2 2330 proc1 : dnz = 3, o_nz = 2 2331 proc2 : dnz = 1, o_nz = 4 2332 .ve 2333 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2334 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2335 for proc3. i.e we are using 12+15+10=37 storage locations to store 2336 34 values. 2337 2338 When d_nnz, o_nnz parameters are specified, the storage is specified 2339 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2340 In the above case the values for d_nnz,o_nnz are: 2341 .vb 2342 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2343 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2344 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2345 .ve 2346 Here the space allocated is sum of all the above values i.e 34, and 2347 hence pre-allocation is perfect. 2348 2349 Level: intermediate 2350 2351 .keywords: matrix, aij, compressed row, sparse, parallel 2352 2353 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2354 @*/ 2355 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2356 { 2357 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2358 2359 PetscFunctionBegin; 2360 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2361 if (f) { 2362 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2363 } 2364 PetscFunctionReturn(0); 2365 } 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "MatCreateMPIAIJ" 2369 /*@C 2370 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2371 (the default parallel PETSc format). For good matrix assembly performance 2372 the user should preallocate the matrix storage by setting the parameters 2373 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2374 performance can be increased by more than a factor of 50. 2375 2376 Collective on MPI_Comm 2377 2378 Input Parameters: 2379 + comm - MPI communicator 2380 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2381 This value should be the same as the local size used in creating the 2382 y vector for the matrix-vector product y = Ax. 2383 . n - This value should be the same as the local size used in creating the 2384 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2385 calculated if N is given) For square matrices n is almost always m. 2386 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2387 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2388 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2389 (same value is used for all local rows) 2390 . d_nnz - array containing the number of nonzeros in the various rows of the 2391 DIAGONAL portion of the local submatrix (possibly different for each row) 2392 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2393 The size of this array is equal to the number of local rows, i.e 'm'. 2394 You must leave room for the diagonal entry even if it is zero. 2395 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2396 submatrix (same value is used for all local rows). 2397 - o_nnz - array containing the number of nonzeros in the various rows of the 2398 OFF-DIAGONAL portion of the local submatrix (possibly different for 2399 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2400 structure. The size of this array is equal to the number 2401 of local rows, i.e 'm'. 2402 2403 Output Parameter: 2404 . A - the matrix 2405 2406 Notes: 2407 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2408 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2409 storage requirements for this matrix. 2410 2411 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2412 processor than it must be used on all processors that share the object for 2413 that argument. 2414 2415 The AIJ format (also called the Yale sparse matrix format or 2416 compressed row storage), is fully compatible with standard Fortran 77 2417 storage. That is, the stored row and column indices can begin at 2418 either one (as in Fortran) or zero. See the users manual for details. 2419 2420 The user MUST specify either the local or global matrix dimensions 2421 (possibly both). 2422 2423 The parallel matrix is partitioned such that the first m0 rows belong to 2424 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2425 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2426 2427 The DIAGONAL portion of the local submatrix of a processor can be defined 2428 as the submatrix which is obtained by extraction the part corresponding 2429 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2430 first row that belongs to the processor, and r2 is the last row belonging 2431 to the this processor. This is a square mxm matrix. The remaining portion 2432 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2433 2434 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2435 2436 When calling this routine with a single process communicator, a matrix of 2437 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2438 type of communicator, use the construction mechanism: 2439 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2440 2441 By default, this format uses inodes (identical nodes) when possible. 2442 We search for consecutive rows with the same nonzero structure, thereby 2443 reusing matrix information to achieve increased efficiency. 2444 2445 Options Database Keys: 2446 + -mat_aij_no_inode - Do not use inodes 2447 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2448 - -mat_aij_oneindex - Internally use indexing starting at 1 2449 rather than 0. Note that when calling MatSetValues(), 2450 the user still MUST index entries starting at 0! 2451 2452 2453 Example usage: 2454 2455 Consider the following 8x8 matrix with 34 non-zero values, that is 2456 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2457 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2458 as follows: 2459 2460 .vb 2461 1 2 0 | 0 3 0 | 0 4 2462 Proc0 0 5 6 | 7 0 0 | 8 0 2463 9 0 10 | 11 0 0 | 12 0 2464 ------------------------------------- 2465 13 0 14 | 15 16 17 | 0 0 2466 Proc1 0 18 0 | 19 20 21 | 0 0 2467 0 0 0 | 22 23 0 | 24 0 2468 ------------------------------------- 2469 Proc2 25 26 27 | 0 0 28 | 29 0 2470 30 0 0 | 31 32 33 | 0 34 2471 .ve 2472 2473 This can be represented as a collection of submatrices as: 2474 2475 .vb 2476 A B C 2477 D E F 2478 G H I 2479 .ve 2480 2481 Where the submatrices A,B,C are owned by proc0, D,E,F are 2482 owned by proc1, G,H,I are owned by proc2. 2483 2484 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2485 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2486 The 'M','N' parameters are 8,8, and have the same values on all procs. 2487 2488 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2489 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2490 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2491 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2492 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2493 matrix, ans [DF] as another SeqAIJ matrix. 2494 2495 When d_nz, o_nz parameters are specified, d_nz storage elements are 2496 allocated for every row of the local diagonal submatrix, and o_nz 2497 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2498 One way to choose d_nz and o_nz is to use the max nonzerors per local 2499 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2500 In this case, the values of d_nz,o_nz are: 2501 .vb 2502 proc0 : dnz = 2, o_nz = 2 2503 proc1 : dnz = 3, o_nz = 2 2504 proc2 : dnz = 1, o_nz = 4 2505 .ve 2506 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2507 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2508 for proc3. i.e we are using 12+15+10=37 storage locations to store 2509 34 values. 2510 2511 When d_nnz, o_nnz parameters are specified, the storage is specified 2512 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2513 In the above case the values for d_nnz,o_nnz are: 2514 .vb 2515 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2516 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2517 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2518 .ve 2519 Here the space allocated is sum of all the above values i.e 34, and 2520 hence pre-allocation is perfect. 2521 2522 Level: intermediate 2523 2524 .keywords: matrix, aij, compressed row, sparse, parallel 2525 2526 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2527 @*/ 2528 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2529 { 2530 int ierr,size; 2531 2532 PetscFunctionBegin; 2533 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2534 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2535 if (size > 1) { 2536 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2537 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2538 } else { 2539 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2540 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2541 } 2542 PetscFunctionReturn(0); 2543 } 2544 2545 #undef __FUNCT__ 2546 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2547 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2548 { 2549 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2550 PetscFunctionBegin; 2551 *Ad = a->A; 2552 *Ao = a->B; 2553 *colmap = a->garray; 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2559 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2560 { 2561 int ierr,i; 2562 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2563 2564 PetscFunctionBegin; 2565 if (coloring->ctype == IS_COLORING_LOCAL) { 2566 ISColoringValue *allcolors,*colors; 2567 ISColoring ocoloring; 2568 2569 /* set coloring for diagonal portion */ 2570 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2571 2572 /* set coloring for off-diagonal portion */ 2573 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2574 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2575 for (i=0; i<a->B->n; i++) { 2576 colors[i] = allcolors[a->garray[i]]; 2577 } 2578 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2579 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2580 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2581 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2582 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2583 ISColoringValue *colors; 2584 int *larray; 2585 ISColoring ocoloring; 2586 2587 /* set coloring for diagonal portion */ 2588 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2589 for (i=0; i<a->A->n; i++) { 2590 larray[i] = i + a->cstart; 2591 } 2592 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2593 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2594 for (i=0; i<a->A->n; i++) { 2595 colors[i] = coloring->colors[larray[i]]; 2596 } 2597 ierr = PetscFree(larray);CHKERRQ(ierr); 2598 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2599 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2600 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2601 2602 /* set coloring for off-diagonal portion */ 2603 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2604 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2605 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2606 for (i=0; i<a->B->n; i++) { 2607 colors[i] = coloring->colors[larray[i]]; 2608 } 2609 ierr = PetscFree(larray);CHKERRQ(ierr); 2610 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2611 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2612 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2613 } else { 2614 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2615 } 2616 2617 PetscFunctionReturn(0); 2618 } 2619 2620 #undef __FUNCT__ 2621 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2622 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2623 { 2624 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2625 int ierr; 2626 2627 PetscFunctionBegin; 2628 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2629 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 #undef __FUNCT__ 2634 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2635 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2636 { 2637 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2638 int ierr; 2639 2640 PetscFunctionBegin; 2641 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2642 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2643 PetscFunctionReturn(0); 2644 } 2645 2646 #undef __FUNCT__ 2647 #define __FUNCT__ "MatMerge" 2648 /*@C 2649 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2650 matrices from each processor 2651 2652 Collective on MPI_Comm 2653 2654 Input Parameters: 2655 + comm - the communicators the parallel matrix will live on 2656 - inmat - the input sequential matrices 2657 2658 Output Parameter: 2659 . outmat - the parallel matrix generated 2660 2661 Level: advanced 2662 2663 Notes: The number of columns of the matrix in EACH of the seperate files 2664 MUST be the same. 2665 2666 @*/ 2667 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2668 { 2669 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2670 PetscScalar *values; 2671 PetscMap columnmap,rowmap; 2672 2673 PetscFunctionBegin; 2674 2675 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2676 2677 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2678 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2679 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2680 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2681 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2682 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2683 2684 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2685 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2686 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2687 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2688 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2689 2690 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2691 for (i=0;i<m;i++) { 2692 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2693 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2694 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2695 } 2696 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2697 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2698 2699 for (i=0;i<m;i++) { 2700 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2701 I = i + rstart; 2702 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2703 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2704 } 2705 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2706 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2707 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2708 2709 PetscFunctionReturn(0); 2710 } 2711 2712 #undef __FUNCT__ 2713 #define __FUNCT__ "MatFileSplit" 2714 int MatFileSplit(Mat A,char *outfile) 2715 { 2716 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2717 PetscViewer out; 2718 char *name; 2719 Mat B; 2720 PetscScalar *values; 2721 2722 PetscFunctionBegin; 2723 2724 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2725 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2726 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2727 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2728 for (i=0;i<m;i++) { 2729 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2730 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2731 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2732 } 2733 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2734 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2735 2736 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2737 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2738 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2739 sprintf(name,"%s.%d",outfile,rank); 2740 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2741 ierr = PetscFree(name); 2742 ierr = MatView(B,out);CHKERRQ(ierr); 2743 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2744 ierr = MatDestroy(B);CHKERRQ(ierr); 2745 PetscFunctionReturn(0); 2746 } 2747