1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift=0; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 289 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 294 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_MUMPS) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 #if defined(PETSC_HAVE_MUMPS) 413 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 414 if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); } 415 #endif 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 421 int MatZeroEntries_MPIAIJ(Mat A) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 int ierr; 425 426 PetscFunctionBegin; 427 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 428 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatZeroRows_MPIAIJ" 434 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 435 { 436 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 437 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 438 int *nprocs,j,idx,nsends,row; 439 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 440 int *rvalues,tag = A->tag,count,base,slen,n,*source; 441 int *lens,imdex,*lrows,*values,rstart=l->rstart; 442 MPI_Comm comm = A->comm; 443 MPI_Request *send_waits,*recv_waits; 444 MPI_Status recv_status,*send_status; 445 IS istmp; 446 PetscTruth found; 447 448 PetscFunctionBegin; 449 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 450 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 451 452 /* first count number of contributors to each processor */ 453 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 454 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 455 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 456 for (i=0; i<N; i++) { 457 idx = rows[i]; 458 found = PETSC_FALSE; 459 for (j=0; j<size; j++) { 460 if (idx >= owners[j] && idx < owners[j+1]) { 461 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 462 } 463 } 464 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 465 } 466 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 467 468 /* inform other processors of number of messages and max length*/ 469 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 470 471 /* post receives: */ 472 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 473 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 474 for (i=0; i<nrecvs; i++) { 475 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 476 } 477 478 /* do sends: 479 1) starts[i] gives the starting index in svalues for stuff going to 480 the ith processor 481 */ 482 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 484 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 485 starts[0] = 0; 486 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 487 for (i=0; i<N; i++) { 488 svalues[starts[owner[i]]++] = rows[i]; 489 } 490 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 491 492 starts[0] = 0; 493 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 494 count = 0; 495 for (i=0; i<size; i++) { 496 if (nprocs[2*i+1]) { 497 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 498 } 499 } 500 ierr = PetscFree(starts);CHKERRQ(ierr); 501 502 base = owners[rank]; 503 504 /* wait on receives */ 505 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 506 source = lens + nrecvs; 507 count = nrecvs; slen = 0; 508 while (count) { 509 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 510 /* unpack receives into our local space */ 511 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 512 source[imdex] = recv_status.MPI_SOURCE; 513 lens[imdex] = n; 514 slen += n; 515 count--; 516 } 517 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 518 519 /* move the data into the send scatter */ 520 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 521 count = 0; 522 for (i=0; i<nrecvs; i++) { 523 values = rvalues + i*nmax; 524 for (j=0; j<lens[i]; j++) { 525 lrows[count++] = values[j] - base; 526 } 527 } 528 ierr = PetscFree(rvalues);CHKERRQ(ierr); 529 ierr = PetscFree(lens);CHKERRQ(ierr); 530 ierr = PetscFree(owner);CHKERRQ(ierr); 531 ierr = PetscFree(nprocs);CHKERRQ(ierr); 532 533 /* actually zap the local rows */ 534 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 535 PetscLogObjectParent(A,istmp); 536 537 /* 538 Zero the required rows. If the "diagonal block" of the matrix 539 is square and the user wishes to set the diagonal we use seperate 540 code so that MatSetValues() is not called for each diagonal allocating 541 new memory, thus calling lots of mallocs and slowing things down. 542 543 Contributed by: Mathew Knepley 544 */ 545 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 546 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 547 if (diag && (l->A->M == l->A->N)) { 548 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 549 } else if (diag) { 550 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 551 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 552 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 554 } 555 for (i = 0; i < slen; i++) { 556 row = lrows[i] + rstart; 557 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 558 } 559 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 561 } else { 562 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 563 } 564 ierr = ISDestroy(istmp);CHKERRQ(ierr); 565 ierr = PetscFree(lrows);CHKERRQ(ierr); 566 567 /* wait on sends */ 568 if (nsends) { 569 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 570 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 571 ierr = PetscFree(send_status);CHKERRQ(ierr); 572 } 573 ierr = PetscFree(send_waits);CHKERRQ(ierr); 574 ierr = PetscFree(svalues);CHKERRQ(ierr); 575 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatMult_MPIAIJ" 581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 582 { 583 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 584 int ierr,nt; 585 586 PetscFunctionBegin; 587 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 588 if (nt != A->n) { 589 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 590 } 591 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 593 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 594 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMultAdd_MPIAIJ" 600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 603 int ierr; 604 605 PetscFunctionBegin; 606 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 607 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 608 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 609 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNCT__ 614 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 616 { 617 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 618 int ierr; 619 620 PetscFunctionBegin; 621 /* do nondiagonal part */ 622 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 623 /* send it on its way */ 624 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 625 /* do local part */ 626 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 627 /* receive remote parts: note this assumes the values are not actually */ 628 /* inserted in yy until the next line, which is true for my implementation*/ 629 /* but is not perhaps always true. */ 630 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } 633 634 EXTERN_C_BEGIN 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 637 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 638 { 639 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 640 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 641 IS Me,Notme; 642 int M,N,first,last,*notme,i, ierr; 643 644 PetscFunctionBegin; 645 646 /* Easy test: symmetric diagonal block */ 647 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 648 ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr); 649 if (!*f) PetscFunctionReturn(0); 650 651 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 652 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 653 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 654 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 655 for (i=0; i<first; i++) notme[i] = i; 656 for (i=last; i<M; i++) notme[i-last+first] = i; 657 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 658 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 659 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 660 Aoff = Aoffs[0]; 661 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 662 Boff = Boffs[0]; 663 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 664 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 665 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 666 ierr = ISDestroy(Me); CHKERRQ(ierr); 667 ierr = ISDestroy(Notme); CHKERRQ(ierr); 668 669 PetscFunctionReturn(0); 670 } 671 EXTERN_C_END 672 673 #undef __FUNCT__ 674 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 675 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 676 { 677 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 678 int ierr; 679 680 PetscFunctionBegin; 681 /* do nondiagonal part */ 682 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 683 /* send it on its way */ 684 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 685 /* do local part */ 686 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 687 /* receive remote parts: note this assumes the values are not actually */ 688 /* inserted in yy until the next line, which is true for my implementation*/ 689 /* but is not perhaps always true. */ 690 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 691 PetscFunctionReturn(0); 692 } 693 694 /* 695 This only works correctly for square matrices where the subblock A->A is the 696 diagonal block 697 */ 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 700 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 701 { 702 int ierr; 703 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 704 705 PetscFunctionBegin; 706 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 707 if (a->rstart != a->cstart || a->rend != a->cend) { 708 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 709 } 710 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatScale_MPIAIJ" 716 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 717 { 718 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 719 int ierr; 720 721 PetscFunctionBegin; 722 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 723 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatDestroy_MPIAIJ" 729 int MatDestroy_MPIAIJ(Mat mat) 730 { 731 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 732 int ierr; 733 734 PetscFunctionBegin; 735 #if defined(PETSC_USE_LOG) 736 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 737 #endif 738 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 739 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 740 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 741 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 742 #if defined (PETSC_USE_CTABLE) 743 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 744 #else 745 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 746 #endif 747 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 748 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 749 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 750 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 751 ierr = PetscFree(aij);CHKERRQ(ierr); 752 PetscFunctionReturn(0); 753 } 754 755 extern int MatFactorInfo_MUMPS(Mat,PetscViewer); 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatView_MPIAIJ_Binary" 759 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 762 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 763 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 764 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 765 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 766 PetscScalar *column_values; 767 768 PetscFunctionBegin; 769 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 770 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 771 nz = A->nz + B->nz; 772 if (rank == 0) { 773 header[0] = MAT_FILE_COOKIE; 774 header[1] = mat->M; 775 header[2] = mat->N; 776 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 777 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 778 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 779 /* get largest number of rows any processor has */ 780 rlen = mat->m; 781 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 782 for (i=1; i<size; i++) { 783 rlen = PetscMax(rlen,range[i+1] - range[i]); 784 } 785 } else { 786 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 787 rlen = mat->m; 788 } 789 790 /* load up the local row counts */ 791 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 792 for (i=0; i<mat->m; i++) { 793 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 794 } 795 796 /* store the row lengths to the file */ 797 if (rank == 0) { 798 MPI_Status status; 799 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 800 for (i=1; i<size; i++) { 801 rlen = range[i+1] - range[i]; 802 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 803 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 804 } 805 } else { 806 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 807 } 808 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 809 810 /* load up the local column indices */ 811 nzmax = nz; /* )th processor needs space a largest processor needs */ 812 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 813 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 814 cnt = 0; 815 for (i=0; i<mat->m; i++) { 816 for (j=B->i[i]; j<B->i[i+1]; j++) { 817 if ( (col = garray[B->j[j]]) > cstart) break; 818 column_indices[cnt++] = col; 819 } 820 for (k=A->i[i]; k<A->i[i+1]; k++) { 821 column_indices[cnt++] = A->j[k] + cstart; 822 } 823 for (; j<B->i[i+1]; j++) { 824 column_indices[cnt++] = garray[B->j[j]]; 825 } 826 } 827 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 828 829 /* store the column indices to the file */ 830 if (rank == 0) { 831 MPI_Status status; 832 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 833 for (i=1; i<size; i++) { 834 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 835 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 836 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 837 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 838 } 839 } else { 840 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 841 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 842 } 843 ierr = PetscFree(column_indices);CHKERRQ(ierr); 844 845 /* load up the local column values */ 846 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 847 cnt = 0; 848 for (i=0; i<mat->m; i++) { 849 for (j=B->i[i]; j<B->i[i+1]; j++) { 850 if ( garray[B->j[j]] > cstart) break; 851 column_values[cnt++] = B->a[j]; 852 } 853 for (k=A->i[i]; k<A->i[i+1]; k++) { 854 column_values[cnt++] = A->a[k]; 855 } 856 for (; j<B->i[i+1]; j++) { 857 column_values[cnt++] = B->a[j]; 858 } 859 } 860 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 861 862 /* store the column values to the file */ 863 if (rank == 0) { 864 MPI_Status status; 865 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 866 for (i=1; i<size; i++) { 867 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 868 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 869 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 870 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 871 } 872 } else { 873 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 874 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 875 } 876 ierr = PetscFree(column_values);CHKERRQ(ierr); 877 PetscFunctionReturn(0); 878 } 879 880 #undef __FUNCT__ 881 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 882 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 883 { 884 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 885 int ierr,rank = aij->rank,size = aij->size; 886 PetscTruth isdraw,isascii,flg,isbinary; 887 PetscViewer sviewer; 888 PetscViewerFormat format; 889 890 PetscFunctionBegin; 891 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 892 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 893 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 894 if (isascii) { 895 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 896 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 897 MatInfo info; 898 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 899 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 901 if (flg) { 902 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 903 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 904 } else { 905 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 906 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 907 } 908 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 909 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 910 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 911 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 912 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 913 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 914 PetscFunctionReturn(0); 915 } else if (format == PETSC_VIEWER_ASCII_INFO) { 916 PetscFunctionReturn(0); 917 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 918 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE) 919 ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr); 920 #endif 921 PetscFunctionReturn(0); 922 } 923 } else if (isbinary) { 924 if (size == 1) { 925 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 926 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 927 } else { 928 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } else if (isdraw) { 932 PetscDraw draw; 933 PetscTruth isnull; 934 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 935 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 936 } 937 938 if (size == 1) { 939 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 940 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 941 } else { 942 /* assemble the entire matrix onto first processor. */ 943 Mat A; 944 Mat_SeqAIJ *Aloc; 945 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 946 PetscScalar *a; 947 948 if (!rank) { 949 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 950 } else { 951 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 952 } 953 PetscLogObjectParent(mat,A); 954 955 /* copy over the A part */ 956 Aloc = (Mat_SeqAIJ*)aij->A->data; 957 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 958 row = aij->rstart; 959 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 960 for (i=0; i<m; i++) { 961 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 962 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 963 } 964 aj = Aloc->j; 965 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 966 967 /* copy over the B part */ 968 Aloc = (Mat_SeqAIJ*)aij->B->data; 969 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 970 row = aij->rstart; 971 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 972 ct = cols; 973 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 974 for (i=0; i<m; i++) { 975 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 976 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 977 } 978 ierr = PetscFree(ct);CHKERRQ(ierr); 979 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 980 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 /* 982 Everyone has to call to draw the matrix since the graphics waits are 983 synchronized across all processors that share the PetscDraw object 984 */ 985 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 986 if (!rank) { 987 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 988 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 989 } 990 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 991 ierr = MatDestroy(A);CHKERRQ(ierr); 992 } 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatView_MPIAIJ" 998 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 999 { 1000 int ierr; 1001 PetscTruth isascii,isdraw,issocket,isbinary; 1002 1003 PetscFunctionBegin; 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1006 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1007 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1008 if (isascii || isdraw || isbinary || issocket) { 1009 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1010 } else { 1011 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1012 } 1013 PetscFunctionReturn(0); 1014 } 1015 1016 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatRelax_MPIAIJ" 1020 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1021 { 1022 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1023 int ierr; 1024 Vec bb1; 1025 PetscScalar mone=-1.0; 1026 1027 PetscFunctionBegin; 1028 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1029 1030 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1031 1032 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1033 if (flag & SOR_ZERO_INITIAL_GUESS) { 1034 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1035 its--; 1036 } 1037 1038 while (its--) { 1039 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1040 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 1042 /* update rhs: bb1 = bb - B*x */ 1043 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1044 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1045 1046 /* local sweep */ 1047 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1048 CHKERRQ(ierr); 1049 } 1050 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1051 if (flag & SOR_ZERO_INITIAL_GUESS) { 1052 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1053 its--; 1054 } 1055 while (its--) { 1056 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1057 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1058 1059 /* update rhs: bb1 = bb - B*x */ 1060 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1061 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1062 1063 /* local sweep */ 1064 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1065 CHKERRQ(ierr); 1066 } 1067 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1068 if (flag & SOR_ZERO_INITIAL_GUESS) { 1069 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1070 its--; 1071 } 1072 while (its--) { 1073 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1074 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1075 1076 /* update rhs: bb1 = bb - B*x */ 1077 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1078 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1079 1080 /* local sweep */ 1081 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1082 CHKERRQ(ierr); 1083 } 1084 } else { 1085 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1086 } 1087 1088 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1094 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1095 { 1096 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1097 Mat A = mat->A,B = mat->B; 1098 int ierr; 1099 PetscReal isend[5],irecv[5]; 1100 1101 PetscFunctionBegin; 1102 info->block_size = 1.0; 1103 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1104 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1105 isend[3] = info->memory; isend[4] = info->mallocs; 1106 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1107 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1108 isend[3] += info->memory; isend[4] += info->mallocs; 1109 if (flag == MAT_LOCAL) { 1110 info->nz_used = isend[0]; 1111 info->nz_allocated = isend[1]; 1112 info->nz_unneeded = isend[2]; 1113 info->memory = isend[3]; 1114 info->mallocs = isend[4]; 1115 } else if (flag == MAT_GLOBAL_MAX) { 1116 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1117 info->nz_used = irecv[0]; 1118 info->nz_allocated = irecv[1]; 1119 info->nz_unneeded = irecv[2]; 1120 info->memory = irecv[3]; 1121 info->mallocs = irecv[4]; 1122 } else if (flag == MAT_GLOBAL_SUM) { 1123 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1124 info->nz_used = irecv[0]; 1125 info->nz_allocated = irecv[1]; 1126 info->nz_unneeded = irecv[2]; 1127 info->memory = irecv[3]; 1128 info->mallocs = irecv[4]; 1129 } 1130 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1131 info->fill_ratio_needed = 0; 1132 info->factor_mallocs = 0; 1133 info->rows_global = (double)matin->M; 1134 info->columns_global = (double)matin->N; 1135 info->rows_local = (double)matin->m; 1136 info->columns_local = (double)matin->N; 1137 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatSetOption_MPIAIJ" 1143 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1144 { 1145 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1146 int ierr; 1147 1148 PetscFunctionBegin; 1149 switch (op) { 1150 case MAT_NO_NEW_NONZERO_LOCATIONS: 1151 case MAT_YES_NEW_NONZERO_LOCATIONS: 1152 case MAT_COLUMNS_UNSORTED: 1153 case MAT_COLUMNS_SORTED: 1154 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1155 case MAT_KEEP_ZEROED_ROWS: 1156 case MAT_NEW_NONZERO_LOCATION_ERR: 1157 case MAT_USE_INODES: 1158 case MAT_DO_NOT_USE_INODES: 1159 case MAT_IGNORE_ZERO_ENTRIES: 1160 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1161 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1162 break; 1163 case MAT_ROW_ORIENTED: 1164 a->roworiented = PETSC_TRUE; 1165 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1166 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1167 break; 1168 case MAT_ROWS_SORTED: 1169 case MAT_ROWS_UNSORTED: 1170 case MAT_YES_NEW_DIAGONALS: 1171 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1172 break; 1173 case MAT_COLUMN_ORIENTED: 1174 a->roworiented = PETSC_FALSE; 1175 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1176 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1177 break; 1178 case MAT_IGNORE_OFF_PROC_ENTRIES: 1179 a->donotstash = PETSC_TRUE; 1180 break; 1181 case MAT_NO_NEW_DIAGONALS: 1182 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1183 default: 1184 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1185 } 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatGetRow_MPIAIJ" 1191 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1192 { 1193 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1194 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1195 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1196 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1197 int *cmap,*idx_p; 1198 1199 PetscFunctionBegin; 1200 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1201 mat->getrowactive = PETSC_TRUE; 1202 1203 if (!mat->rowvalues && (idx || v)) { 1204 /* 1205 allocate enough space to hold information from the longest row. 1206 */ 1207 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1208 int max = 1,tmp; 1209 for (i=0; i<matin->m; i++) { 1210 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1211 if (max < tmp) { max = tmp; } 1212 } 1213 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1214 mat->rowindices = (int*)(mat->rowvalues + max); 1215 } 1216 1217 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1218 lrow = row - rstart; 1219 1220 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1221 if (!v) {pvA = 0; pvB = 0;} 1222 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1223 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1224 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1225 nztot = nzA + nzB; 1226 1227 cmap = mat->garray; 1228 if (v || idx) { 1229 if (nztot) { 1230 /* Sort by increasing column numbers, assuming A and B already sorted */ 1231 int imark = -1; 1232 if (v) { 1233 *v = v_p = mat->rowvalues; 1234 for (i=0; i<nzB; i++) { 1235 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1236 else break; 1237 } 1238 imark = i; 1239 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1240 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1241 } 1242 if (idx) { 1243 *idx = idx_p = mat->rowindices; 1244 if (imark > -1) { 1245 for (i=0; i<imark; i++) { 1246 idx_p[i] = cmap[cworkB[i]]; 1247 } 1248 } else { 1249 for (i=0; i<nzB; i++) { 1250 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1251 else break; 1252 } 1253 imark = i; 1254 } 1255 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1256 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1257 } 1258 } else { 1259 if (idx) *idx = 0; 1260 if (v) *v = 0; 1261 } 1262 } 1263 *nz = nztot; 1264 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1265 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1271 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1272 { 1273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1274 1275 PetscFunctionBegin; 1276 if (aij->getrowactive == PETSC_FALSE) { 1277 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1278 } 1279 aij->getrowactive = PETSC_FALSE; 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatNorm_MPIAIJ" 1285 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1286 { 1287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1288 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1289 int ierr,i,j,cstart = aij->cstart; 1290 PetscReal sum = 0.0; 1291 PetscScalar *v; 1292 1293 PetscFunctionBegin; 1294 if (aij->size == 1) { 1295 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1296 } else { 1297 if (type == NORM_FROBENIUS) { 1298 v = amat->a; 1299 for (i=0; i<amat->nz; i++) { 1300 #if defined(PETSC_USE_COMPLEX) 1301 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1302 #else 1303 sum += (*v)*(*v); v++; 1304 #endif 1305 } 1306 v = bmat->a; 1307 for (i=0; i<bmat->nz; i++) { 1308 #if defined(PETSC_USE_COMPLEX) 1309 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1310 #else 1311 sum += (*v)*(*v); v++; 1312 #endif 1313 } 1314 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1315 *norm = sqrt(*norm); 1316 } else if (type == NORM_1) { /* max column norm */ 1317 PetscReal *tmp,*tmp2; 1318 int *jj,*garray = aij->garray; 1319 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1320 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1321 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1322 *norm = 0.0; 1323 v = amat->a; jj = amat->j; 1324 for (j=0; j<amat->nz; j++) { 1325 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1326 } 1327 v = bmat->a; jj = bmat->j; 1328 for (j=0; j<bmat->nz; j++) { 1329 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1330 } 1331 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1332 for (j=0; j<mat->N; j++) { 1333 if (tmp2[j] > *norm) *norm = tmp2[j]; 1334 } 1335 ierr = PetscFree(tmp);CHKERRQ(ierr); 1336 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1337 } else if (type == NORM_INFINITY) { /* max row norm */ 1338 PetscReal ntemp = 0.0; 1339 for (j=0; j<aij->A->m; j++) { 1340 v = amat->a + amat->i[j]; 1341 sum = 0.0; 1342 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1343 sum += PetscAbsScalar(*v); v++; 1344 } 1345 v = bmat->a + bmat->i[j]; 1346 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1347 sum += PetscAbsScalar(*v); v++; 1348 } 1349 if (sum > ntemp) ntemp = sum; 1350 } 1351 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1352 } else { 1353 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1354 } 1355 } 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatTranspose_MPIAIJ" 1361 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1362 { 1363 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1364 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1365 int ierr; 1366 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1367 Mat B; 1368 PetscScalar *array; 1369 1370 PetscFunctionBegin; 1371 if (!matout && M != N) { 1372 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1373 } 1374 1375 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1376 1377 /* copy over the A part */ 1378 Aloc = (Mat_SeqAIJ*)a->A->data; 1379 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1380 row = a->rstart; 1381 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1382 for (i=0; i<m; i++) { 1383 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1384 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1385 } 1386 aj = Aloc->j; 1387 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1388 1389 /* copy over the B part */ 1390 Aloc = (Mat_SeqAIJ*)a->B->data; 1391 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1392 row = a->rstart; 1393 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1394 ct = cols; 1395 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1396 for (i=0; i<m; i++) { 1397 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1398 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1399 } 1400 ierr = PetscFree(ct);CHKERRQ(ierr); 1401 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1402 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1403 if (matout) { 1404 *matout = B; 1405 } else { 1406 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1413 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1414 { 1415 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1416 Mat a = aij->A,b = aij->B; 1417 int ierr,s1,s2,s3; 1418 1419 PetscFunctionBegin; 1420 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1421 if (rr) { 1422 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1423 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1424 /* Overlap communication with computation. */ 1425 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1426 } 1427 if (ll) { 1428 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1429 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1430 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1431 } 1432 /* scale the diagonal block */ 1433 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1434 1435 if (rr) { 1436 /* Do a scatter end and then right scale the off-diagonal block */ 1437 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1438 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1439 } 1440 1441 PetscFunctionReturn(0); 1442 } 1443 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1447 int MatPrintHelp_MPIAIJ(Mat A) 1448 { 1449 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1450 int ierr; 1451 1452 PetscFunctionBegin; 1453 if (!a->rank) { 1454 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1461 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1462 { 1463 PetscFunctionBegin; 1464 *bs = 1; 1465 PetscFunctionReturn(0); 1466 } 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1469 int MatSetUnfactored_MPIAIJ(Mat A) 1470 { 1471 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1472 int ierr; 1473 1474 PetscFunctionBegin; 1475 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } 1478 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatEqual_MPIAIJ" 1481 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1482 { 1483 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1484 Mat a,b,c,d; 1485 PetscTruth flg; 1486 int ierr; 1487 1488 PetscFunctionBegin; 1489 a = matA->A; b = matA->B; 1490 c = matB->A; d = matB->B; 1491 1492 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1493 if (flg == PETSC_TRUE) { 1494 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1495 } 1496 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatCopy_MPIAIJ" 1502 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1503 { 1504 int ierr; 1505 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1506 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1507 1508 PetscFunctionBegin; 1509 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1510 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1511 /* because of the column compression in the off-processor part of the matrix a->B, 1512 the number of columns in a->B and b->B may be different, hence we cannot call 1513 the MatCopy() directly on the two parts. If need be, we can provide a more 1514 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1515 then copying the submatrices */ 1516 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1517 } else { 1518 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1519 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1526 int MatSetUpPreallocation_MPIAIJ(Mat A) 1527 { 1528 int ierr; 1529 1530 PetscFunctionBegin; 1531 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1536 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1537 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1538 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 1539 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1540 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1541 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1542 #endif 1543 1544 #include "petscblaslapack.h" 1545 extern int MatAXPY_SeqAIJ(const PetscScalar[],Mat,Mat,MatStructure); 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatAXPY_MPIAIJ" 1548 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1549 { 1550 int ierr,one=1,i; 1551 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1552 Mat_SeqAIJ *x,*y; 1553 1554 PetscFunctionBegin; 1555 if (str == SAME_NONZERO_PATTERN) { 1556 x = (Mat_SeqAIJ *)xx->A->data; 1557 y = (Mat_SeqAIJ *)yy->A->data; 1558 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1559 x = (Mat_SeqAIJ *)xx->B->data; 1560 y = (Mat_SeqAIJ *)yy->B->data; 1561 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1562 } else if (str == SUBSET_NONZERO_PATTERN) { 1563 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1564 1565 x = (Mat_SeqAIJ *)xx->B->data; 1566 y = (Mat_SeqAIJ *)yy->B->data; 1567 if (y->xtoy && y->XtoY != xx->B) { 1568 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1569 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1570 } 1571 if (!y->xtoy) { /* get xtoy */ 1572 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1573 y->XtoY = xx->B; 1574 } 1575 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1576 } else { 1577 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1578 } 1579 PetscFunctionReturn(0); 1580 } 1581 1582 /* -------------------------------------------------------------------*/ 1583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1584 MatGetRow_MPIAIJ, 1585 MatRestoreRow_MPIAIJ, 1586 MatMult_MPIAIJ, 1587 MatMultAdd_MPIAIJ, 1588 MatMultTranspose_MPIAIJ, 1589 MatMultTransposeAdd_MPIAIJ, 1590 0, 1591 0, 1592 0, 1593 0, 1594 0, 1595 0, 1596 MatRelax_MPIAIJ, 1597 MatTranspose_MPIAIJ, 1598 MatGetInfo_MPIAIJ, 1599 MatEqual_MPIAIJ, 1600 MatGetDiagonal_MPIAIJ, 1601 MatDiagonalScale_MPIAIJ, 1602 MatNorm_MPIAIJ, 1603 MatAssemblyBegin_MPIAIJ, 1604 MatAssemblyEnd_MPIAIJ, 1605 0, 1606 MatSetOption_MPIAIJ, 1607 MatZeroEntries_MPIAIJ, 1608 MatZeroRows_MPIAIJ, 1609 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1610 MatLUFactorSymbolic_MPIAIJ_TFS, 1611 #else 1612 0, 1613 #endif 1614 0, 1615 0, 1616 0, 1617 MatSetUpPreallocation_MPIAIJ, 1618 0, 1619 0, 1620 0, 1621 0, 1622 MatDuplicate_MPIAIJ, 1623 0, 1624 0, 1625 0, 1626 0, 1627 MatAXPY_MPIAIJ, 1628 MatGetSubMatrices_MPIAIJ, 1629 MatIncreaseOverlap_MPIAIJ, 1630 MatGetValues_MPIAIJ, 1631 MatCopy_MPIAIJ, 1632 MatPrintHelp_MPIAIJ, 1633 MatScale_MPIAIJ, 1634 0, 1635 0, 1636 0, 1637 MatGetBlockSize_MPIAIJ, 1638 0, 1639 0, 1640 0, 1641 0, 1642 MatFDColoringCreate_MPIAIJ, 1643 0, 1644 MatSetUnfactored_MPIAIJ, 1645 0, 1646 0, 1647 MatGetSubMatrix_MPIAIJ, 1648 MatDestroy_MPIAIJ, 1649 MatView_MPIAIJ, 1650 MatGetPetscMaps_Petsc, 1651 0, 1652 0, 1653 0, 1654 0, 1655 0, 1656 0, 1657 0, 1658 0, 1659 MatSetColoring_MPIAIJ, 1660 MatSetValuesAdic_MPIAIJ, 1661 MatSetValuesAdifor_MPIAIJ 1662 }; 1663 1664 /* ----------------------------------------------------------------------------------------*/ 1665 1666 EXTERN_C_BEGIN 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1669 int MatStoreValues_MPIAIJ(Mat mat) 1670 { 1671 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1672 int ierr; 1673 1674 PetscFunctionBegin; 1675 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1676 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 EXTERN_C_END 1680 1681 EXTERN_C_BEGIN 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1684 int MatRetrieveValues_MPIAIJ(Mat mat) 1685 { 1686 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1687 int ierr; 1688 1689 PetscFunctionBegin; 1690 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1691 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1692 PetscFunctionReturn(0); 1693 } 1694 EXTERN_C_END 1695 1696 #include "petscpc.h" 1697 EXTERN_C_BEGIN 1698 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1699 EXTERN int MatDiagonalScaleLocal_MPIAIJ(Mat,Vec); 1700 EXTERN_C_END 1701 1702 EXTERN_C_BEGIN 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1705 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1706 { 1707 Mat_MPIAIJ *b; 1708 int ierr,i; 1709 1710 PetscFunctionBegin; 1711 B->preallocated = PETSC_TRUE; 1712 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1713 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1714 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1715 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1716 if (d_nnz) { 1717 for (i=0; i<B->m; i++) { 1718 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1719 } 1720 } 1721 if (o_nnz) { 1722 for (i=0; i<B->m; i++) { 1723 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1724 } 1725 } 1726 b = (Mat_MPIAIJ*)B->data; 1727 1728 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1729 PetscLogObjectParent(B,b->A); 1730 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1731 PetscLogObjectParent(B,b->B); 1732 1733 PetscFunctionReturn(0); 1734 } 1735 EXTERN_C_END 1736 1737 EXTERN_C_BEGIN 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatCreate_MPIAIJ" 1740 int MatCreate_MPIAIJ(Mat B) 1741 { 1742 Mat_MPIAIJ *b; 1743 int ierr,i,size; 1744 1745 PetscFunctionBegin; 1746 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1747 1748 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1749 B->data = (void*)b; 1750 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1751 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1752 B->factor = 0; 1753 B->assembled = PETSC_FALSE; 1754 B->mapping = 0; 1755 1756 B->insertmode = NOT_SET_VALUES; 1757 b->size = size; 1758 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1759 1760 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1761 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1762 1763 /* the information in the maps duplicates the information computed below, eventually 1764 we should remove the duplicate information that is not contained in the maps */ 1765 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1766 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1767 1768 /* build local table of row and column ownerships */ 1769 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1770 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1771 b->cowners = b->rowners + b->size + 2; 1772 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1773 b->rowners[0] = 0; 1774 for (i=2; i<=b->size; i++) { 1775 b->rowners[i] += b->rowners[i-1]; 1776 } 1777 b->rstart = b->rowners[b->rank]; 1778 b->rend = b->rowners[b->rank+1]; 1779 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1780 b->cowners[0] = 0; 1781 for (i=2; i<=b->size; i++) { 1782 b->cowners[i] += b->cowners[i-1]; 1783 } 1784 b->cstart = b->cowners[b->rank]; 1785 b->cend = b->cowners[b->rank+1]; 1786 1787 /* build cache for off array entries formed */ 1788 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1789 b->donotstash = PETSC_FALSE; 1790 b->colmap = 0; 1791 b->garray = 0; 1792 b->roworiented = PETSC_TRUE; 1793 1794 /* stuff used for matrix vector multiply */ 1795 b->lvec = PETSC_NULL; 1796 b->Mvctx = PETSC_NULL; 1797 1798 /* stuff for MatGetRow() */ 1799 b->rowindices = 0; 1800 b->rowvalues = 0; 1801 b->getrowactive = PETSC_FALSE; 1802 1803 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1804 "MatStoreValues_MPIAIJ", 1805 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1806 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1807 "MatRetrieveValues_MPIAIJ", 1808 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1809 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1810 "MatGetDiagonalBlock_MPIAIJ", 1811 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1812 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1813 "MatIsSymmetric_MPIAIJ", 1814 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1815 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1816 "MatMPIAIJSetPreallocation_MPIAIJ", 1817 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1818 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1819 "MatDiagonalScaleLocal_MPIAIJ", 1820 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 EXTERN_C_END 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1827 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1828 { 1829 Mat mat; 1830 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1831 int ierr; 1832 1833 PetscFunctionBegin; 1834 *newmat = 0; 1835 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1836 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1837 a = (Mat_MPIAIJ*)mat->data; 1838 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1839 mat->factor = matin->factor; 1840 mat->assembled = PETSC_TRUE; 1841 mat->insertmode = NOT_SET_VALUES; 1842 mat->preallocated = PETSC_TRUE; 1843 1844 a->rstart = oldmat->rstart; 1845 a->rend = oldmat->rend; 1846 a->cstart = oldmat->cstart; 1847 a->cend = oldmat->cend; 1848 a->size = oldmat->size; 1849 a->rank = oldmat->rank; 1850 a->donotstash = oldmat->donotstash; 1851 a->roworiented = oldmat->roworiented; 1852 a->rowindices = 0; 1853 a->rowvalues = 0; 1854 a->getrowactive = PETSC_FALSE; 1855 1856 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1857 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1858 if (oldmat->colmap) { 1859 #if defined (PETSC_USE_CTABLE) 1860 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1861 #else 1862 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1863 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1864 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1865 #endif 1866 } else a->colmap = 0; 1867 if (oldmat->garray) { 1868 int len; 1869 len = oldmat->B->n; 1870 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1871 PetscLogObjectMemory(mat,len*sizeof(int)); 1872 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1873 } else a->garray = 0; 1874 1875 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1876 PetscLogObjectParent(mat,a->lvec); 1877 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1878 PetscLogObjectParent(mat,a->Mvctx); 1879 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1880 PetscLogObjectParent(mat,a->A); 1881 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1882 PetscLogObjectParent(mat,a->B); 1883 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1884 *newmat = mat; 1885 PetscFunctionReturn(0); 1886 } 1887 1888 #include "petscsys.h" 1889 1890 EXTERN_C_BEGIN 1891 #undef __FUNCT__ 1892 #define __FUNCT__ "MatLoad_MPIAIJ" 1893 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1894 { 1895 Mat A; 1896 PetscScalar *vals,*svals; 1897 MPI_Comm comm = ((PetscObject)viewer)->comm; 1898 MPI_Status status; 1899 int i,nz,ierr,j,rstart,rend,fd; 1900 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1901 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1902 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1903 #if defined(PETSC_HAVE_MUMPS) 1904 PetscTruth flag; 1905 #endif 1906 1907 PetscFunctionBegin; 1908 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1909 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1910 if (!rank) { 1911 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1912 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1913 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1914 if (header[3] < 0) { 1915 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1916 } 1917 } 1918 1919 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1920 M = header[1]; N = header[2]; 1921 /* determine ownership of all rows */ 1922 m = M/size + ((M % size) > rank); 1923 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1924 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1925 rowners[0] = 0; 1926 for (i=2; i<=size; i++) { 1927 rowners[i] += rowners[i-1]; 1928 } 1929 rstart = rowners[rank]; 1930 rend = rowners[rank+1]; 1931 1932 /* distribute row lengths to all processors */ 1933 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1934 offlens = ourlens + (rend-rstart); 1935 if (!rank) { 1936 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1937 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1938 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1939 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1940 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1941 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1942 } else { 1943 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1944 } 1945 1946 if (!rank) { 1947 /* calculate the number of nonzeros on each processor */ 1948 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1949 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1950 for (i=0; i<size; i++) { 1951 for (j=rowners[i]; j< rowners[i+1]; j++) { 1952 procsnz[i] += rowlengths[j]; 1953 } 1954 } 1955 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1956 1957 /* determine max buffer needed and allocate it */ 1958 maxnz = 0; 1959 for (i=0; i<size; i++) { 1960 maxnz = PetscMax(maxnz,procsnz[i]); 1961 } 1962 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1963 1964 /* read in my part of the matrix column indices */ 1965 nz = procsnz[0]; 1966 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1967 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1968 1969 /* read in every one elses and ship off */ 1970 for (i=1; i<size; i++) { 1971 nz = procsnz[i]; 1972 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1973 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1974 } 1975 ierr = PetscFree(cols);CHKERRQ(ierr); 1976 } else { 1977 /* determine buffer space needed for message */ 1978 nz = 0; 1979 for (i=0; i<m; i++) { 1980 nz += ourlens[i]; 1981 } 1982 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1983 1984 /* receive message of column indices*/ 1985 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1986 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1987 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1988 } 1989 1990 /* determine column ownership if matrix is not square */ 1991 if (N != M) { 1992 n = N/size + ((N % size) > rank); 1993 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1994 cstart = cend - n; 1995 } else { 1996 cstart = rstart; 1997 cend = rend; 1998 n = cend - cstart; 1999 } 2000 2001 /* loop over local rows, determining number of off diagonal entries */ 2002 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2003 jj = 0; 2004 for (i=0; i<m; i++) { 2005 for (j=0; j<ourlens[i]; j++) { 2006 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2007 jj++; 2008 } 2009 } 2010 2011 /* create our matrix */ 2012 for (i=0; i<m; i++) { 2013 ourlens[i] -= offlens[i]; 2014 } 2015 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2016 ierr = MatSetType(A,type);CHKERRQ(ierr); 2017 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2018 2019 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2020 for (i=0; i<m; i++) { 2021 ourlens[i] += offlens[i]; 2022 } 2023 2024 if (!rank) { 2025 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2026 2027 /* read in my part of the matrix numerical values */ 2028 nz = procsnz[0]; 2029 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2030 2031 /* insert into matrix */ 2032 jj = rstart; 2033 smycols = mycols; 2034 svals = vals; 2035 for (i=0; i<m; i++) { 2036 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2037 smycols += ourlens[i]; 2038 svals += ourlens[i]; 2039 jj++; 2040 } 2041 2042 /* read in other processors and ship out */ 2043 for (i=1; i<size; i++) { 2044 nz = procsnz[i]; 2045 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2046 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2047 } 2048 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2049 } else { 2050 /* receive numeric values */ 2051 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2052 2053 /* receive message of values*/ 2054 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2055 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2056 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2057 2058 /* insert into matrix */ 2059 jj = rstart; 2060 smycols = mycols; 2061 svals = vals; 2062 for (i=0; i<m; i++) { 2063 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2064 smycols += ourlens[i]; 2065 svals += ourlens[i]; 2066 jj++; 2067 } 2068 } 2069 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2070 ierr = PetscFree(vals);CHKERRQ(ierr); 2071 ierr = PetscFree(mycols);CHKERRQ(ierr); 2072 ierr = PetscFree(rowners);CHKERRQ(ierr); 2073 2074 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2075 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2076 #if defined(PETSC_HAVE_MUMPS) 2077 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr); 2078 if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); } 2079 #endif 2080 *newmat = A; 2081 PetscFunctionReturn(0); 2082 } 2083 EXTERN_C_END 2084 2085 #undef __FUNCT__ 2086 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2087 /* 2088 Not great since it makes two copies of the submatrix, first an SeqAIJ 2089 in local and then by concatenating the local matrices the end result. 2090 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2091 */ 2092 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2093 { 2094 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2095 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2096 Mat *local,M,Mreuse; 2097 PetscScalar *vwork,*aa; 2098 MPI_Comm comm = mat->comm; 2099 Mat_SeqAIJ *aij; 2100 2101 2102 PetscFunctionBegin; 2103 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2104 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2105 2106 if (call == MAT_REUSE_MATRIX) { 2107 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2108 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2109 local = &Mreuse; 2110 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2111 } else { 2112 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2113 Mreuse = *local; 2114 ierr = PetscFree(local);CHKERRQ(ierr); 2115 } 2116 2117 /* 2118 m - number of local rows 2119 n - number of columns (same on all processors) 2120 rstart - first row in new global matrix generated 2121 */ 2122 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2123 if (call == MAT_INITIAL_MATRIX) { 2124 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2125 ii = aij->i; 2126 jj = aij->j; 2127 2128 /* 2129 Determine the number of non-zeros in the diagonal and off-diagonal 2130 portions of the matrix in order to do correct preallocation 2131 */ 2132 2133 /* first get start and end of "diagonal" columns */ 2134 if (csize == PETSC_DECIDE) { 2135 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2136 if (mglobal == n) { /* square matrix */ 2137 nlocal = m; 2138 } else { 2139 nlocal = n/size + ((n % size) > rank); 2140 } 2141 } else { 2142 nlocal = csize; 2143 } 2144 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2145 rstart = rend - nlocal; 2146 if (rank == size - 1 && rend != n) { 2147 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2148 } 2149 2150 /* next, compute all the lengths */ 2151 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2152 olens = dlens + m; 2153 for (i=0; i<m; i++) { 2154 jend = ii[i+1] - ii[i]; 2155 olen = 0; 2156 dlen = 0; 2157 for (j=0; j<jend; j++) { 2158 if (*jj < rstart || *jj >= rend) olen++; 2159 else dlen++; 2160 jj++; 2161 } 2162 olens[i] = olen; 2163 dlens[i] = dlen; 2164 } 2165 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2166 ierr = PetscFree(dlens);CHKERRQ(ierr); 2167 } else { 2168 int ml,nl; 2169 2170 M = *newmat; 2171 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2172 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2173 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2174 /* 2175 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2176 rather than the slower MatSetValues(). 2177 */ 2178 M->was_assembled = PETSC_TRUE; 2179 M->assembled = PETSC_FALSE; 2180 } 2181 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2182 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2183 ii = aij->i; 2184 jj = aij->j; 2185 aa = aij->a; 2186 for (i=0; i<m; i++) { 2187 row = rstart + i; 2188 nz = ii[i+1] - ii[i]; 2189 cwork = jj; jj += nz; 2190 vwork = aa; aa += nz; 2191 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2192 } 2193 2194 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2195 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2196 *newmat = M; 2197 2198 /* save submatrix used in processor for next request */ 2199 if (call == MAT_INITIAL_MATRIX) { 2200 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2201 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2202 } 2203 2204 PetscFunctionReturn(0); 2205 } 2206 2207 #undef __FUNCT__ 2208 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2209 /*@C 2210 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2211 (the default parallel PETSc format). For good matrix assembly performance 2212 the user should preallocate the matrix storage by setting the parameters 2213 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2214 performance can be increased by more than a factor of 50. 2215 2216 Collective on MPI_Comm 2217 2218 Input Parameters: 2219 + A - the matrix 2220 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2221 (same value is used for all local rows) 2222 . d_nnz - array containing the number of nonzeros in the various rows of the 2223 DIAGONAL portion of the local submatrix (possibly different for each row) 2224 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2225 The size of this array is equal to the number of local rows, i.e 'm'. 2226 You must leave room for the diagonal entry even if it is zero. 2227 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2228 submatrix (same value is used for all local rows). 2229 - o_nnz - array containing the number of nonzeros in the various rows of the 2230 OFF-DIAGONAL portion of the local submatrix (possibly different for 2231 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2232 structure. The size of this array is equal to the number 2233 of local rows, i.e 'm'. 2234 2235 The AIJ format (also called the Yale sparse matrix format or 2236 compressed row storage), is fully compatible with standard Fortran 77 2237 storage. That is, the stored row and column indices can begin at 2238 either one (as in Fortran) or zero. See the users manual for details. 2239 2240 The user MUST specify either the local or global matrix dimensions 2241 (possibly both). 2242 2243 The parallel matrix is partitioned such that the first m0 rows belong to 2244 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2245 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2246 2247 The DIAGONAL portion of the local submatrix of a processor can be defined 2248 as the submatrix which is obtained by extraction the part corresponding 2249 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2250 first row that belongs to the processor, and r2 is the last row belonging 2251 to the this processor. This is a square mxm matrix. The remaining portion 2252 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2253 2254 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2255 2256 By default, this format uses inodes (identical nodes) when possible. 2257 We search for consecutive rows with the same nonzero structure, thereby 2258 reusing matrix information to achieve increased efficiency. 2259 2260 Options Database Keys: 2261 + -mat_aij_no_inode - Do not use inodes 2262 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2263 - -mat_aij_oneindex - Internally use indexing starting at 1 2264 rather than 0. Note that when calling MatSetValues(), 2265 the user still MUST index entries starting at 0! 2266 2267 Example usage: 2268 2269 Consider the following 8x8 matrix with 34 non-zero values, that is 2270 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2271 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2272 as follows: 2273 2274 .vb 2275 1 2 0 | 0 3 0 | 0 4 2276 Proc0 0 5 6 | 7 0 0 | 8 0 2277 9 0 10 | 11 0 0 | 12 0 2278 ------------------------------------- 2279 13 0 14 | 15 16 17 | 0 0 2280 Proc1 0 18 0 | 19 20 21 | 0 0 2281 0 0 0 | 22 23 0 | 24 0 2282 ------------------------------------- 2283 Proc2 25 26 27 | 0 0 28 | 29 0 2284 30 0 0 | 31 32 33 | 0 34 2285 .ve 2286 2287 This can be represented as a collection of submatrices as: 2288 2289 .vb 2290 A B C 2291 D E F 2292 G H I 2293 .ve 2294 2295 Where the submatrices A,B,C are owned by proc0, D,E,F are 2296 owned by proc1, G,H,I are owned by proc2. 2297 2298 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2299 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2300 The 'M','N' parameters are 8,8, and have the same values on all procs. 2301 2302 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2303 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2304 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2305 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2306 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2307 matrix, ans [DF] as another SeqAIJ matrix. 2308 2309 When d_nz, o_nz parameters are specified, d_nz storage elements are 2310 allocated for every row of the local diagonal submatrix, and o_nz 2311 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2312 One way to choose d_nz and o_nz is to use the max nonzerors per local 2313 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2314 In this case, the values of d_nz,o_nz are: 2315 .vb 2316 proc0 : dnz = 2, o_nz = 2 2317 proc1 : dnz = 3, o_nz = 2 2318 proc2 : dnz = 1, o_nz = 4 2319 .ve 2320 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2321 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2322 for proc3. i.e we are using 12+15+10=37 storage locations to store 2323 34 values. 2324 2325 When d_nnz, o_nnz parameters are specified, the storage is specified 2326 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2327 In the above case the values for d_nnz,o_nnz are: 2328 .vb 2329 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2330 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2331 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2332 .ve 2333 Here the space allocated is sum of all the above values i.e 34, and 2334 hence pre-allocation is perfect. 2335 2336 Level: intermediate 2337 2338 .keywords: matrix, aij, compressed row, sparse, parallel 2339 2340 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2341 @*/ 2342 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2343 { 2344 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2345 2346 PetscFunctionBegin; 2347 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2348 if (f) { 2349 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2350 } 2351 PetscFunctionReturn(0); 2352 } 2353 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "MatCreateMPIAIJ" 2356 /*@C 2357 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2358 (the default parallel PETSc format). For good matrix assembly performance 2359 the user should preallocate the matrix storage by setting the parameters 2360 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2361 performance can be increased by more than a factor of 50. 2362 2363 Collective on MPI_Comm 2364 2365 Input Parameters: 2366 + comm - MPI communicator 2367 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2368 This value should be the same as the local size used in creating the 2369 y vector for the matrix-vector product y = Ax. 2370 . n - This value should be the same as the local size used in creating the 2371 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2372 calculated if N is given) For square matrices n is almost always m. 2373 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2374 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2375 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2376 (same value is used for all local rows) 2377 . d_nnz - array containing the number of nonzeros in the various rows of the 2378 DIAGONAL portion of the local submatrix (possibly different for each row) 2379 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2380 The size of this array is equal to the number of local rows, i.e 'm'. 2381 You must leave room for the diagonal entry even if it is zero. 2382 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2383 submatrix (same value is used for all local rows). 2384 - o_nnz - array containing the number of nonzeros in the various rows of the 2385 OFF-DIAGONAL portion of the local submatrix (possibly different for 2386 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2387 structure. The size of this array is equal to the number 2388 of local rows, i.e 'm'. 2389 2390 Output Parameter: 2391 . A - the matrix 2392 2393 Notes: 2394 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2395 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2396 storage requirements for this matrix. 2397 2398 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2399 processor than it must be used on all processors that share the object for 2400 that argument. 2401 2402 The AIJ format (also called the Yale sparse matrix format or 2403 compressed row storage), is fully compatible with standard Fortran 77 2404 storage. That is, the stored row and column indices can begin at 2405 either one (as in Fortran) or zero. See the users manual for details. 2406 2407 The user MUST specify either the local or global matrix dimensions 2408 (possibly both). 2409 2410 The parallel matrix is partitioned such that the first m0 rows belong to 2411 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2412 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2413 2414 The DIAGONAL portion of the local submatrix of a processor can be defined 2415 as the submatrix which is obtained by extraction the part corresponding 2416 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2417 first row that belongs to the processor, and r2 is the last row belonging 2418 to the this processor. This is a square mxm matrix. The remaining portion 2419 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2420 2421 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2422 2423 When calling this routine with a single process communicator, a matrix of 2424 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2425 type of communicator, use the construction mechanism: 2426 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2427 2428 By default, this format uses inodes (identical nodes) when possible. 2429 We search for consecutive rows with the same nonzero structure, thereby 2430 reusing matrix information to achieve increased efficiency. 2431 2432 Options Database Keys: 2433 + -mat_aij_no_inode - Do not use inodes 2434 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2435 - -mat_aij_oneindex - Internally use indexing starting at 1 2436 rather than 0. Note that when calling MatSetValues(), 2437 the user still MUST index entries starting at 0! 2438 2439 2440 Example usage: 2441 2442 Consider the following 8x8 matrix with 34 non-zero values, that is 2443 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2444 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2445 as follows: 2446 2447 .vb 2448 1 2 0 | 0 3 0 | 0 4 2449 Proc0 0 5 6 | 7 0 0 | 8 0 2450 9 0 10 | 11 0 0 | 12 0 2451 ------------------------------------- 2452 13 0 14 | 15 16 17 | 0 0 2453 Proc1 0 18 0 | 19 20 21 | 0 0 2454 0 0 0 | 22 23 0 | 24 0 2455 ------------------------------------- 2456 Proc2 25 26 27 | 0 0 28 | 29 0 2457 30 0 0 | 31 32 33 | 0 34 2458 .ve 2459 2460 This can be represented as a collection of submatrices as: 2461 2462 .vb 2463 A B C 2464 D E F 2465 G H I 2466 .ve 2467 2468 Where the submatrices A,B,C are owned by proc0, D,E,F are 2469 owned by proc1, G,H,I are owned by proc2. 2470 2471 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2472 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2473 The 'M','N' parameters are 8,8, and have the same values on all procs. 2474 2475 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2476 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2477 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2478 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2479 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2480 matrix, ans [DF] as another SeqAIJ matrix. 2481 2482 When d_nz, o_nz parameters are specified, d_nz storage elements are 2483 allocated for every row of the local diagonal submatrix, and o_nz 2484 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2485 One way to choose d_nz and o_nz is to use the max nonzerors per local 2486 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2487 In this case, the values of d_nz,o_nz are: 2488 .vb 2489 proc0 : dnz = 2, o_nz = 2 2490 proc1 : dnz = 3, o_nz = 2 2491 proc2 : dnz = 1, o_nz = 4 2492 .ve 2493 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2494 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2495 for proc3. i.e we are using 12+15+10=37 storage locations to store 2496 34 values. 2497 2498 When d_nnz, o_nnz parameters are specified, the storage is specified 2499 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2500 In the above case the values for d_nnz,o_nnz are: 2501 .vb 2502 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2503 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2504 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2505 .ve 2506 Here the space allocated is sum of all the above values i.e 34, and 2507 hence pre-allocation is perfect. 2508 2509 Level: intermediate 2510 2511 .keywords: matrix, aij, compressed row, sparse, parallel 2512 2513 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2514 @*/ 2515 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2516 { 2517 int ierr,size; 2518 2519 PetscFunctionBegin; 2520 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2521 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2522 if (size > 1) { 2523 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2524 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2525 } else { 2526 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2527 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2528 } 2529 PetscFunctionReturn(0); 2530 } 2531 2532 #undef __FUNCT__ 2533 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2534 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2535 { 2536 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2537 PetscFunctionBegin; 2538 *Ad = a->A; 2539 *Ao = a->B; 2540 *colmap = a->garray; 2541 PetscFunctionReturn(0); 2542 } 2543 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2546 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2547 { 2548 int ierr,i; 2549 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2550 2551 PetscFunctionBegin; 2552 if (coloring->ctype == IS_COLORING_LOCAL) { 2553 ISColoringValue *allcolors,*colors; 2554 ISColoring ocoloring; 2555 2556 /* set coloring for diagonal portion */ 2557 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2558 2559 /* set coloring for off-diagonal portion */ 2560 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2561 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2562 for (i=0; i<a->B->n; i++) { 2563 colors[i] = allcolors[a->garray[i]]; 2564 } 2565 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2566 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2567 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2568 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2569 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2570 ISColoringValue *colors; 2571 int *larray; 2572 ISColoring ocoloring; 2573 2574 /* set coloring for diagonal portion */ 2575 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2576 for (i=0; i<a->A->n; i++) { 2577 larray[i] = i + a->cstart; 2578 } 2579 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2580 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2581 for (i=0; i<a->A->n; i++) { 2582 colors[i] = coloring->colors[larray[i]]; 2583 } 2584 ierr = PetscFree(larray);CHKERRQ(ierr); 2585 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2586 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2587 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2588 2589 /* set coloring for off-diagonal portion */ 2590 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2591 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2592 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2593 for (i=0; i<a->B->n; i++) { 2594 colors[i] = coloring->colors[larray[i]]; 2595 } 2596 ierr = PetscFree(larray);CHKERRQ(ierr); 2597 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2598 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2599 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2600 } else { 2601 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2602 } 2603 2604 PetscFunctionReturn(0); 2605 } 2606 2607 #undef __FUNCT__ 2608 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2609 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2610 { 2611 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2612 int ierr; 2613 2614 PetscFunctionBegin; 2615 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2616 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 #undef __FUNCT__ 2621 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2622 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2623 { 2624 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2625 int ierr; 2626 2627 PetscFunctionBegin; 2628 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2629 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 #undef __FUNCT__ 2634 #define __FUNCT__ "MatMerge" 2635 /*@C 2636 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2637 matrices from each processor 2638 2639 Collective on MPI_Comm 2640 2641 Input Parameters: 2642 + comm - the communicators the parallel matrix will live on 2643 - inmat - the input sequential matrices 2644 2645 Output Parameter: 2646 . outmat - the parallel matrix generated 2647 2648 Level: advanced 2649 2650 Notes: The number of columns of the matrix in EACH of the seperate files 2651 MUST be the same. 2652 2653 @*/ 2654 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2655 { 2656 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2657 PetscScalar *values; 2658 PetscMap columnmap,rowmap; 2659 2660 PetscFunctionBegin; 2661 2662 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2663 2664 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2665 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2666 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2667 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2668 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2669 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2670 2671 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2672 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2673 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2674 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2675 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2676 2677 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2678 for (i=0;i<m;i++) { 2679 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2680 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2681 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2682 } 2683 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2684 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2685 2686 for (i=0;i<m;i++) { 2687 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2688 I = i + rstart; 2689 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2690 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2691 } 2692 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2693 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2694 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2695 2696 PetscFunctionReturn(0); 2697 } 2698 2699 #undef __FUNCT__ 2700 #define __FUNCT__ "MatFileSplit" 2701 int MatFileSplit(Mat A,char *outfile) 2702 { 2703 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2704 PetscViewer out; 2705 char *name; 2706 Mat B; 2707 PetscScalar *values; 2708 2709 PetscFunctionBegin; 2710 2711 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2712 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2713 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2714 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2715 for (i=0;i<m;i++) { 2716 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2717 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2718 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2719 } 2720 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2721 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2722 2723 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2724 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2725 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2726 sprintf(name,"%s.%d",outfile,rank); 2727 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2728 ierr = PetscFree(name); 2729 ierr = MatView(B,out);CHKERRQ(ierr); 2730 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2731 ierr = MatDestroy(B);CHKERRQ(ierr); 2732 PetscFunctionReturn(0); 2733 } 2734