1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 /* 8 Local utility routine that creates a mapping from the global column 9 number to the local number in the off-diagonal part of the local 10 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 11 a slightly higher hash table cost; without it it is not scalable (each processor 12 has an order N integer array but is fast to acess. 13 */ 14 #undef __FUNCT__ 15 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 19 int n = aij->B->n,i,ierr; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 29 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 #define CHUNKSIZE 15 37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 38 { \ 39 \ 40 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 41 rmax = aimax[row]; nrow = ailen[row]; \ 42 col1 = col - shift; \ 43 \ 44 low = 0; high = nrow; \ 45 while (high-low > 5) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > col) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > col1) break; \ 52 if (rp[_i] == col1) { \ 53 if (addv == ADD_VALUES) ap[_i] += value; \ 54 else ap[_i] = value; \ 55 goto a_noinsert; \ 56 } \ 57 } \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (int*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(int));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (nonew == 1) goto b_noinsert; \ 133 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 134 if (nrow >= rmax) { \ 135 /* there is no extra room in row, therefore enlarge */ \ 136 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 137 PetscScalar *new_a; \ 138 \ 139 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 140 \ 141 /* malloc new storage space */ \ 142 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 143 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 144 new_j = (int*)(new_a + new_nz); \ 145 new_i = new_j + new_nz; \ 146 \ 147 /* copy over old data into new slots */ \ 148 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 149 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 150 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 151 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 152 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 153 len*sizeof(int));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 156 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 157 /* free up old matrix storage */ \ 158 \ 159 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 160 if (!b->singlemalloc) { \ 161 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 162 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 163 } \ 164 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 165 b->singlemalloc = PETSC_TRUE; \ 166 \ 167 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 168 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 169 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 170 b->maxnz += CHUNKSIZE; \ 171 b->reallocs++; \ 172 } \ 173 N = nrow++ - 1; b->nz++; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ap[ii+1] = ap[ii]; \ 178 } \ 179 rp[_i] = col1; \ 180 ap[_i] = value; \ 181 b_noinsert: ; \ 182 bilen[row] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIAIJ" 187 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 190 PetscScalar value; 191 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 192 int cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 int nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 277 int cstart = aij->cstart,cend = aij->cend,row,col; 278 279 PetscFunctionBegin; 280 for (i=0; i<m; i++) { 281 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 282 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 283 if (idxm[i] >= rstart && idxm[i] < rend) { 284 row = idxm[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 287 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 288 if (idxn[j] >= cstart && idxn[j] < cend){ 289 col = idxn[j] - cstart; 290 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 291 } else { 292 if (!aij->colmap) { 293 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 294 } 295 #if defined (PETSC_USE_CTABLE) 296 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 297 col --; 298 #else 299 col = aij->colmap[idxn[j]] - 1; 300 #endif 301 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 302 else { 303 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 304 } 305 } 306 } 307 } else { 308 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 309 } 310 } 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 316 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 317 { 318 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 319 int ierr,nstash,reallocs; 320 InsertMode addv; 321 322 PetscFunctionBegin; 323 if (aij->donotstash) { 324 PetscFunctionReturn(0); 325 } 326 327 /* make sure all processors are either in INSERTMODE or ADDMODE */ 328 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 329 if (addv == (ADD_VALUES|INSERT_VALUES)) { 330 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 331 } 332 mat->insertmode = addv; /* in case this processor had no cache */ 333 334 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 335 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 336 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 337 PetscFunctionReturn(0); 338 } 339 340 341 #undef __FUNCT__ 342 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 343 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 344 { 345 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 346 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 347 int i,j,rstart,ncols,n,ierr,flg; 348 int *row,*col,other_disassembled; 349 PetscScalar *val; 350 InsertMode addv = mat->insertmode; 351 352 PetscFunctionBegin; 353 if (!aij->donotstash) { 354 while (1) { 355 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 356 if (!flg) break; 357 358 for (i=0; i<n;) { 359 /* Now identify the consecutive vals belonging to the same row */ 360 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 361 if (j < n) ncols = j-i; 362 else ncols = n-i; 363 /* Now assemble all these values with a single function call */ 364 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 365 i = j; 366 } 367 } 368 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 369 } 370 371 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 372 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 373 374 /* determine if any processor has disassembled, if so we must 375 also disassemble ourselfs, in order that we may reassemble. */ 376 /* 377 if nonzero structure of submatrix B cannot change then we know that 378 no processor disassembled thus we can skip this stuff 379 */ 380 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 381 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 382 if (mat->was_assembled && !other_disassembled) { 383 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 384 } 385 } 386 387 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 388 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 389 } 390 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 391 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 392 393 if (aij->rowvalues) { 394 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 395 aij->rowvalues = 0; 396 } 397 398 /* used by MatAXPY() */ 399 a->xtoy = 0; b->xtoy = 0; 400 a->XtoY = 0; b->XtoY = 0; 401 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 407 int MatZeroEntries_MPIAIJ(Mat A) 408 { 409 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 410 int ierr; 411 412 PetscFunctionBegin; 413 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 414 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatZeroRows_MPIAIJ" 420 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 421 { 422 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 423 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 424 int *nprocs,j,idx,nsends,row; 425 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 426 int *rvalues,tag = A->tag,count,base,slen,n,*source; 427 int *lens,imdex,*lrows,*values,rstart=l->rstart; 428 MPI_Comm comm = A->comm; 429 MPI_Request *send_waits,*recv_waits; 430 MPI_Status recv_status,*send_status; 431 IS istmp; 432 PetscTruth found; 433 434 PetscFunctionBegin; 435 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 436 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 437 438 /* first count number of contributors to each processor */ 439 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 440 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 441 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 442 for (i=0; i<N; i++) { 443 idx = rows[i]; 444 found = PETSC_FALSE; 445 for (j=0; j<size; j++) { 446 if (idx >= owners[j] && idx < owners[j+1]) { 447 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 448 } 449 } 450 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 451 } 452 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 453 454 /* inform other processors of number of messages and max length*/ 455 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 456 457 /* post receives: */ 458 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 459 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 460 for (i=0; i<nrecvs; i++) { 461 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 462 } 463 464 /* do sends: 465 1) starts[i] gives the starting index in svalues for stuff going to 466 the ith processor 467 */ 468 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 469 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 470 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 471 starts[0] = 0; 472 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 473 for (i=0; i<N; i++) { 474 svalues[starts[owner[i]]++] = rows[i]; 475 } 476 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 477 478 starts[0] = 0; 479 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 480 count = 0; 481 for (i=0; i<size; i++) { 482 if (nprocs[2*i+1]) { 483 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 484 } 485 } 486 ierr = PetscFree(starts);CHKERRQ(ierr); 487 488 base = owners[rank]; 489 490 /* wait on receives */ 491 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 492 source = lens + nrecvs; 493 count = nrecvs; slen = 0; 494 while (count) { 495 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 496 /* unpack receives into our local space */ 497 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 498 source[imdex] = recv_status.MPI_SOURCE; 499 lens[imdex] = n; 500 slen += n; 501 count--; 502 } 503 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 504 505 /* move the data into the send scatter */ 506 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 507 count = 0; 508 for (i=0; i<nrecvs; i++) { 509 values = rvalues + i*nmax; 510 for (j=0; j<lens[i]; j++) { 511 lrows[count++] = values[j] - base; 512 } 513 } 514 ierr = PetscFree(rvalues);CHKERRQ(ierr); 515 ierr = PetscFree(lens);CHKERRQ(ierr); 516 ierr = PetscFree(owner);CHKERRQ(ierr); 517 ierr = PetscFree(nprocs);CHKERRQ(ierr); 518 519 /* actually zap the local rows */ 520 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 521 PetscLogObjectParent(A,istmp); 522 523 /* 524 Zero the required rows. If the "diagonal block" of the matrix 525 is square and the user wishes to set the diagonal we use seperate 526 code so that MatSetValues() is not called for each diagonal allocating 527 new memory, thus calling lots of mallocs and slowing things down. 528 529 Contributed by: Mathew Knepley 530 */ 531 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 532 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 533 if (diag && (l->A->M == l->A->N)) { 534 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 535 } else if (diag) { 536 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 537 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 538 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 539 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 540 } 541 for (i = 0; i < slen; i++) { 542 row = lrows[i] + rstart; 543 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 544 } 545 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 547 } else { 548 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 549 } 550 ierr = ISDestroy(istmp);CHKERRQ(ierr); 551 ierr = PetscFree(lrows);CHKERRQ(ierr); 552 553 /* wait on sends */ 554 if (nsends) { 555 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 556 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 557 ierr = PetscFree(send_status);CHKERRQ(ierr); 558 } 559 ierr = PetscFree(send_waits);CHKERRQ(ierr); 560 ierr = PetscFree(svalues);CHKERRQ(ierr); 561 562 PetscFunctionReturn(0); 563 } 564 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatMult_MPIAIJ" 567 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 568 { 569 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 570 int ierr,nt; 571 572 PetscFunctionBegin; 573 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 574 if (nt != A->n) { 575 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 576 } 577 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 578 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 579 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 580 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatMultAdd_MPIAIJ" 586 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 587 { 588 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 589 int ierr; 590 591 PetscFunctionBegin; 592 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 594 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 595 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 601 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 602 { 603 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 604 int ierr; 605 606 PetscFunctionBegin; 607 /* do nondiagonal part */ 608 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 609 /* send it on its way */ 610 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 611 /* do local part */ 612 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 613 /* receive remote parts: note this assumes the values are not actually */ 614 /* inserted in yy until the next line, which is true for my implementation*/ 615 /* but is not perhaps always true. */ 616 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 PetscFunctionReturn(0); 618 } 619 620 EXTERN_C_BEGIN 621 #undef __FUNCT__ 622 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 623 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 624 { 625 MPI_Comm comm; 626 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 627 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 628 IS Me,Notme; 629 int M,N,first,last,*notme,ntids,i, ierr; 630 631 PetscFunctionBegin; 632 633 /* Easy test: symmetric diagonal block */ 634 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 635 ierr = MatIsTranspose(Adia,Bdia,f); CHKERRQ(ierr); 636 if (!*f) PetscFunctionReturn(0); 637 ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr); 638 ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr); 639 if (ntids==1) PetscFunctionReturn(0); 640 641 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 642 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 643 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 644 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 645 for (i=0; i<first; i++) notme[i] = i; 646 for (i=last; i<M; i++) notme[i-last+first] = i; 647 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 648 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 649 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 650 Aoff = Aoffs[0]; 651 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 652 Boff = Boffs[0]; 653 ierr = MatIsTranspose(Aoff,Boff,f); CHKERRQ(ierr); 654 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 655 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 656 ierr = ISDestroy(Me); CHKERRQ(ierr); 657 ierr = ISDestroy(Notme); CHKERRQ(ierr); 658 659 PetscFunctionReturn(0); 660 } 661 EXTERN_C_END 662 663 #undef __FUNCT__ 664 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 665 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 666 { 667 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 668 int ierr; 669 670 PetscFunctionBegin; 671 /* do nondiagonal part */ 672 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 673 /* send it on its way */ 674 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 675 /* do local part */ 676 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 677 /* receive remote parts: note this assumes the values are not actually */ 678 /* inserted in yy until the next line, which is true for my implementation*/ 679 /* but is not perhaps always true. */ 680 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 684 /* 685 This only works correctly for square matrices where the subblock A->A is the 686 diagonal block 687 */ 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 690 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 691 { 692 int ierr; 693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 694 695 PetscFunctionBegin; 696 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 697 if (a->rstart != a->cstart || a->rend != a->cend) { 698 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 699 } 700 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatScale_MPIAIJ" 706 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 707 { 708 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 709 int ierr; 710 711 PetscFunctionBegin; 712 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 713 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } 716 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatDestroy_MPIAIJ" 719 int MatDestroy_MPIAIJ(Mat mat) 720 { 721 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 722 int ierr; 723 724 PetscFunctionBegin; 725 #if defined(PETSC_USE_LOG) 726 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 727 #endif 728 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 729 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 730 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 731 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 732 #if defined (PETSC_USE_CTABLE) 733 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 734 #else 735 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 736 #endif 737 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 738 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 739 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 740 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 741 ierr = PetscFree(aij);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatView_MPIAIJ_Binary" 747 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 748 { 749 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 750 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 751 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 752 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 753 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 754 PetscScalar *column_values; 755 756 PetscFunctionBegin; 757 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 758 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 759 nz = A->nz + B->nz; 760 if (rank == 0) { 761 header[0] = MAT_FILE_COOKIE; 762 header[1] = mat->M; 763 header[2] = mat->N; 764 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 765 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 766 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 767 /* get largest number of rows any processor has */ 768 rlen = mat->m; 769 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 770 for (i=1; i<size; i++) { 771 rlen = PetscMax(rlen,range[i+1] - range[i]); 772 } 773 } else { 774 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 775 rlen = mat->m; 776 } 777 778 /* load up the local row counts */ 779 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 780 for (i=0; i<mat->m; i++) { 781 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 782 } 783 784 /* store the row lengths to the file */ 785 if (rank == 0) { 786 MPI_Status status; 787 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 788 for (i=1; i<size; i++) { 789 rlen = range[i+1] - range[i]; 790 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 791 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 792 } 793 } else { 794 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 795 } 796 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 797 798 /* load up the local column indices */ 799 nzmax = nz; /* )th processor needs space a largest processor needs */ 800 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 801 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 802 cnt = 0; 803 for (i=0; i<mat->m; i++) { 804 for (j=B->i[i]; j<B->i[i+1]; j++) { 805 if ( (col = garray[B->j[j]]) > cstart) break; 806 column_indices[cnt++] = col; 807 } 808 for (k=A->i[i]; k<A->i[i+1]; k++) { 809 column_indices[cnt++] = A->j[k] + cstart; 810 } 811 for (; j<B->i[i+1]; j++) { 812 column_indices[cnt++] = garray[B->j[j]]; 813 } 814 } 815 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 816 817 /* store the column indices to the file */ 818 if (rank == 0) { 819 MPI_Status status; 820 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 821 for (i=1; i<size; i++) { 822 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 824 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 825 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 826 } 827 } else { 828 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 830 } 831 ierr = PetscFree(column_indices);CHKERRQ(ierr); 832 833 /* load up the local column values */ 834 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 835 cnt = 0; 836 for (i=0; i<mat->m; i++) { 837 for (j=B->i[i]; j<B->i[i+1]; j++) { 838 if ( garray[B->j[j]] > cstart) break; 839 column_values[cnt++] = B->a[j]; 840 } 841 for (k=A->i[i]; k<A->i[i+1]; k++) { 842 column_values[cnt++] = A->a[k]; 843 } 844 for (; j<B->i[i+1]; j++) { 845 column_values[cnt++] = B->a[j]; 846 } 847 } 848 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 849 850 /* store the column values to the file */ 851 if (rank == 0) { 852 MPI_Status status; 853 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 854 for (i=1; i<size; i++) { 855 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 857 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 858 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 859 } 860 } else { 861 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 862 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 863 } 864 ierr = PetscFree(column_values);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 870 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 871 { 872 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 873 int ierr,rank = aij->rank,size = aij->size; 874 PetscTruth isdraw,isascii,flg,isbinary; 875 PetscViewer sviewer; 876 PetscViewerFormat format; 877 878 PetscFunctionBegin; 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 881 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 882 if (isascii) { 883 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 884 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 885 MatInfo info; 886 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 889 if (flg) { 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 891 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 892 } else { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 894 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 895 } 896 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 898 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_INFO) { 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 906 PetscFunctionReturn(0); 907 } 908 } else if (isbinary) { 909 if (size == 1) { 910 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 911 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 912 } else { 913 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 914 } 915 PetscFunctionReturn(0); 916 } else if (isdraw) { 917 PetscDraw draw; 918 PetscTruth isnull; 919 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 920 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 921 } 922 923 if (size == 1) { 924 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 925 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 926 } else { 927 /* assemble the entire matrix onto first processor. */ 928 Mat A; 929 Mat_SeqAIJ *Aloc; 930 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 931 PetscScalar *a; 932 933 if (!rank) { 934 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 935 } else { 936 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 937 } 938 PetscLogObjectParent(mat,A); 939 940 /* copy over the A part */ 941 Aloc = (Mat_SeqAIJ*)aij->A->data; 942 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 943 row = aij->rstart; 944 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 945 for (i=0; i<m; i++) { 946 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 947 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 948 } 949 aj = Aloc->j; 950 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 951 952 /* copy over the B part */ 953 Aloc = (Mat_SeqAIJ*)aij->B->data; 954 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 955 row = aij->rstart; 956 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 957 ct = cols; 958 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 959 for (i=0; i<m; i++) { 960 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 961 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 962 } 963 ierr = PetscFree(ct);CHKERRQ(ierr); 964 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966 /* 967 Everyone has to call to draw the matrix since the graphics waits are 968 synchronized across all processors that share the PetscDraw object 969 */ 970 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 971 if (!rank) { 972 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 973 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 974 } 975 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 976 ierr = MatDestroy(A);CHKERRQ(ierr); 977 } 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatView_MPIAIJ" 983 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 984 { 985 int ierr; 986 PetscTruth isascii,isdraw,issocket,isbinary; 987 988 PetscFunctionBegin; 989 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 990 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 991 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 992 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 993 if (isascii || isdraw || isbinary || issocket) { 994 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 995 } else { 996 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatRelax_MPIAIJ" 1005 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1006 { 1007 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1008 int ierr; 1009 Vec bb1; 1010 PetscScalar mone=-1.0; 1011 1012 PetscFunctionBegin; 1013 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1014 1015 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1016 1017 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1018 if (flag & SOR_ZERO_INITIAL_GUESS) { 1019 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1020 its--; 1021 } 1022 1023 while (its--) { 1024 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1026 1027 /* update rhs: bb1 = bb - B*x */ 1028 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1029 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1030 1031 /* local sweep */ 1032 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1033 CHKERRQ(ierr); 1034 } 1035 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1036 if (flag & SOR_ZERO_INITIAL_GUESS) { 1037 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1038 its--; 1039 } 1040 while (its--) { 1041 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1042 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1043 1044 /* update rhs: bb1 = bb - B*x */ 1045 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1046 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1047 1048 /* local sweep */ 1049 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1050 CHKERRQ(ierr); 1051 } 1052 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1053 if (flag & SOR_ZERO_INITIAL_GUESS) { 1054 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1055 its--; 1056 } 1057 while (its--) { 1058 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1059 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1060 1061 /* update rhs: bb1 = bb - B*x */ 1062 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1063 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1064 1065 /* local sweep */ 1066 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1067 CHKERRQ(ierr); 1068 } 1069 } else { 1070 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1071 } 1072 1073 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1079 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1080 { 1081 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1082 Mat A = mat->A,B = mat->B; 1083 int ierr; 1084 PetscReal isend[5],irecv[5]; 1085 1086 PetscFunctionBegin; 1087 info->block_size = 1.0; 1088 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1089 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1090 isend[3] = info->memory; isend[4] = info->mallocs; 1091 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1092 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1093 isend[3] += info->memory; isend[4] += info->mallocs; 1094 if (flag == MAT_LOCAL) { 1095 info->nz_used = isend[0]; 1096 info->nz_allocated = isend[1]; 1097 info->nz_unneeded = isend[2]; 1098 info->memory = isend[3]; 1099 info->mallocs = isend[4]; 1100 } else if (flag == MAT_GLOBAL_MAX) { 1101 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1102 info->nz_used = irecv[0]; 1103 info->nz_allocated = irecv[1]; 1104 info->nz_unneeded = irecv[2]; 1105 info->memory = irecv[3]; 1106 info->mallocs = irecv[4]; 1107 } else if (flag == MAT_GLOBAL_SUM) { 1108 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1109 info->nz_used = irecv[0]; 1110 info->nz_allocated = irecv[1]; 1111 info->nz_unneeded = irecv[2]; 1112 info->memory = irecv[3]; 1113 info->mallocs = irecv[4]; 1114 } 1115 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1116 info->fill_ratio_needed = 0; 1117 info->factor_mallocs = 0; 1118 info->rows_global = (double)matin->M; 1119 info->columns_global = (double)matin->N; 1120 info->rows_local = (double)matin->m; 1121 info->columns_local = (double)matin->N; 1122 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatSetOption_MPIAIJ" 1128 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1129 { 1130 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1131 int ierr; 1132 1133 PetscFunctionBegin; 1134 switch (op) { 1135 case MAT_NO_NEW_NONZERO_LOCATIONS: 1136 case MAT_YES_NEW_NONZERO_LOCATIONS: 1137 case MAT_COLUMNS_UNSORTED: 1138 case MAT_COLUMNS_SORTED: 1139 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1140 case MAT_KEEP_ZEROED_ROWS: 1141 case MAT_NEW_NONZERO_LOCATION_ERR: 1142 case MAT_USE_INODES: 1143 case MAT_DO_NOT_USE_INODES: 1144 case MAT_IGNORE_ZERO_ENTRIES: 1145 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1146 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1147 break; 1148 case MAT_ROW_ORIENTED: 1149 a->roworiented = PETSC_TRUE; 1150 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1151 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1152 break; 1153 case MAT_ROWS_SORTED: 1154 case MAT_ROWS_UNSORTED: 1155 case MAT_YES_NEW_DIAGONALS: 1156 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1157 break; 1158 case MAT_COLUMN_ORIENTED: 1159 a->roworiented = PETSC_FALSE; 1160 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1161 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1162 break; 1163 case MAT_IGNORE_OFF_PROC_ENTRIES: 1164 a->donotstash = PETSC_TRUE; 1165 break; 1166 case MAT_NO_NEW_DIAGONALS: 1167 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1168 case MAT_SYMMETRIC: 1169 case MAT_STRUCTURALLY_SYMMETRIC: 1170 case MAT_NOT_SYMMETRIC: 1171 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1172 case MAT_HERMITIAN: 1173 case MAT_NOT_HERMITIAN: 1174 case MAT_SYMMETRY_ETERNAL: 1175 case MAT_NOT_SYMMETRY_ETERNAL: 1176 break; 1177 default: 1178 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1179 } 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatGetRow_MPIAIJ" 1185 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1186 { 1187 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1188 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1189 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1190 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1191 int *cmap,*idx_p; 1192 1193 PetscFunctionBegin; 1194 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1195 mat->getrowactive = PETSC_TRUE; 1196 1197 if (!mat->rowvalues && (idx || v)) { 1198 /* 1199 allocate enough space to hold information from the longest row. 1200 */ 1201 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1202 int max = 1,tmp; 1203 for (i=0; i<matin->m; i++) { 1204 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1205 if (max < tmp) { max = tmp; } 1206 } 1207 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1208 mat->rowindices = (int*)(mat->rowvalues + max); 1209 } 1210 1211 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1212 lrow = row - rstart; 1213 1214 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1215 if (!v) {pvA = 0; pvB = 0;} 1216 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1217 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1218 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1219 nztot = nzA + nzB; 1220 1221 cmap = mat->garray; 1222 if (v || idx) { 1223 if (nztot) { 1224 /* Sort by increasing column numbers, assuming A and B already sorted */ 1225 int imark = -1; 1226 if (v) { 1227 *v = v_p = mat->rowvalues; 1228 for (i=0; i<nzB; i++) { 1229 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1230 else break; 1231 } 1232 imark = i; 1233 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1234 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1235 } 1236 if (idx) { 1237 *idx = idx_p = mat->rowindices; 1238 if (imark > -1) { 1239 for (i=0; i<imark; i++) { 1240 idx_p[i] = cmap[cworkB[i]]; 1241 } 1242 } else { 1243 for (i=0; i<nzB; i++) { 1244 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1245 else break; 1246 } 1247 imark = i; 1248 } 1249 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1250 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1251 } 1252 } else { 1253 if (idx) *idx = 0; 1254 if (v) *v = 0; 1255 } 1256 } 1257 *nz = nztot; 1258 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1259 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1265 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1266 { 1267 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1268 1269 PetscFunctionBegin; 1270 if (aij->getrowactive == PETSC_FALSE) { 1271 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1272 } 1273 aij->getrowactive = PETSC_FALSE; 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatNorm_MPIAIJ" 1279 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1280 { 1281 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1282 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1283 int ierr,i,j,cstart = aij->cstart; 1284 PetscReal sum = 0.0; 1285 PetscScalar *v; 1286 1287 PetscFunctionBegin; 1288 if (aij->size == 1) { 1289 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1290 } else { 1291 if (type == NORM_FROBENIUS) { 1292 v = amat->a; 1293 for (i=0; i<amat->nz; i++) { 1294 #if defined(PETSC_USE_COMPLEX) 1295 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1296 #else 1297 sum += (*v)*(*v); v++; 1298 #endif 1299 } 1300 v = bmat->a; 1301 for (i=0; i<bmat->nz; i++) { 1302 #if defined(PETSC_USE_COMPLEX) 1303 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1304 #else 1305 sum += (*v)*(*v); v++; 1306 #endif 1307 } 1308 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1309 *norm = sqrt(*norm); 1310 } else if (type == NORM_1) { /* max column norm */ 1311 PetscReal *tmp,*tmp2; 1312 int *jj,*garray = aij->garray; 1313 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1314 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1315 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1316 *norm = 0.0; 1317 v = amat->a; jj = amat->j; 1318 for (j=0; j<amat->nz; j++) { 1319 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1320 } 1321 v = bmat->a; jj = bmat->j; 1322 for (j=0; j<bmat->nz; j++) { 1323 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1324 } 1325 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1326 for (j=0; j<mat->N; j++) { 1327 if (tmp2[j] > *norm) *norm = tmp2[j]; 1328 } 1329 ierr = PetscFree(tmp);CHKERRQ(ierr); 1330 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1331 } else if (type == NORM_INFINITY) { /* max row norm */ 1332 PetscReal ntemp = 0.0; 1333 for (j=0; j<aij->A->m; j++) { 1334 v = amat->a + amat->i[j]; 1335 sum = 0.0; 1336 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1337 sum += PetscAbsScalar(*v); v++; 1338 } 1339 v = bmat->a + bmat->i[j]; 1340 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1341 sum += PetscAbsScalar(*v); v++; 1342 } 1343 if (sum > ntemp) ntemp = sum; 1344 } 1345 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1346 } else { 1347 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1348 } 1349 } 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatTranspose_MPIAIJ" 1355 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1356 { 1357 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1358 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1359 int ierr; 1360 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1361 Mat B; 1362 PetscScalar *array; 1363 1364 PetscFunctionBegin; 1365 if (!matout && M != N) { 1366 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1367 } 1368 1369 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1370 1371 /* copy over the A part */ 1372 Aloc = (Mat_SeqAIJ*)a->A->data; 1373 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1374 row = a->rstart; 1375 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1376 for (i=0; i<m; i++) { 1377 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1378 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1379 } 1380 aj = Aloc->j; 1381 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1382 1383 /* copy over the B part */ 1384 Aloc = (Mat_SeqAIJ*)a->B->data; 1385 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1386 row = a->rstart; 1387 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1388 ct = cols; 1389 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1390 for (i=0; i<m; i++) { 1391 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1392 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1393 } 1394 ierr = PetscFree(ct);CHKERRQ(ierr); 1395 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1396 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 if (matout) { 1398 *matout = B; 1399 } else { 1400 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1407 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1408 { 1409 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1410 Mat a = aij->A,b = aij->B; 1411 int ierr,s1,s2,s3; 1412 1413 PetscFunctionBegin; 1414 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1415 if (rr) { 1416 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1417 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1418 /* Overlap communication with computation. */ 1419 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1420 } 1421 if (ll) { 1422 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1423 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1424 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1425 } 1426 /* scale the diagonal block */ 1427 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1428 1429 if (rr) { 1430 /* Do a scatter end and then right scale the off-diagonal block */ 1431 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1432 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1433 } 1434 1435 PetscFunctionReturn(0); 1436 } 1437 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1441 int MatPrintHelp_MPIAIJ(Mat A) 1442 { 1443 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1444 int ierr; 1445 1446 PetscFunctionBegin; 1447 if (!a->rank) { 1448 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1449 } 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNCT__ 1454 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1455 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1456 { 1457 PetscFunctionBegin; 1458 *bs = 1; 1459 PetscFunctionReturn(0); 1460 } 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1463 int MatSetUnfactored_MPIAIJ(Mat A) 1464 { 1465 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1466 int ierr; 1467 1468 PetscFunctionBegin; 1469 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatEqual_MPIAIJ" 1475 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1476 { 1477 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1478 Mat a,b,c,d; 1479 PetscTruth flg; 1480 int ierr; 1481 1482 PetscFunctionBegin; 1483 a = matA->A; b = matA->B; 1484 c = matB->A; d = matB->B; 1485 1486 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1487 if (flg == PETSC_TRUE) { 1488 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1489 } 1490 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatCopy_MPIAIJ" 1496 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1497 { 1498 int ierr; 1499 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1500 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1501 1502 PetscFunctionBegin; 1503 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1504 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1505 /* because of the column compression in the off-processor part of the matrix a->B, 1506 the number of columns in a->B and b->B may be different, hence we cannot call 1507 the MatCopy() directly on the two parts. If need be, we can provide a more 1508 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1509 then copying the submatrices */ 1510 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1511 } else { 1512 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1513 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1514 } 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1520 int MatSetUpPreallocation_MPIAIJ(Mat A) 1521 { 1522 int ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1526 PetscFunctionReturn(0); 1527 } 1528 1529 #include "petscblaslapack.h" 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatAXPY_MPIAIJ" 1532 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1533 { 1534 int ierr,one=1,i; 1535 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1536 Mat_SeqAIJ *x,*y; 1537 1538 PetscFunctionBegin; 1539 if (str == SAME_NONZERO_PATTERN) { 1540 x = (Mat_SeqAIJ *)xx->A->data; 1541 y = (Mat_SeqAIJ *)yy->A->data; 1542 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1543 x = (Mat_SeqAIJ *)xx->B->data; 1544 y = (Mat_SeqAIJ *)yy->B->data; 1545 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1546 } else if (str == SUBSET_NONZERO_PATTERN) { 1547 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1548 1549 x = (Mat_SeqAIJ *)xx->B->data; 1550 y = (Mat_SeqAIJ *)yy->B->data; 1551 if (y->xtoy && y->XtoY != xx->B) { 1552 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1553 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1554 } 1555 if (!y->xtoy) { /* get xtoy */ 1556 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1557 y->XtoY = xx->B; 1558 } 1559 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1560 } else { 1561 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1562 } 1563 PetscFunctionReturn(0); 1564 } 1565 1566 /* -------------------------------------------------------------------*/ 1567 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1568 MatGetRow_MPIAIJ, 1569 MatRestoreRow_MPIAIJ, 1570 MatMult_MPIAIJ, 1571 /* 4*/ MatMultAdd_MPIAIJ, 1572 MatMultTranspose_MPIAIJ, 1573 MatMultTransposeAdd_MPIAIJ, 1574 0, 1575 0, 1576 0, 1577 /*10*/ 0, 1578 0, 1579 0, 1580 MatRelax_MPIAIJ, 1581 MatTranspose_MPIAIJ, 1582 /*15*/ MatGetInfo_MPIAIJ, 1583 MatEqual_MPIAIJ, 1584 MatGetDiagonal_MPIAIJ, 1585 MatDiagonalScale_MPIAIJ, 1586 MatNorm_MPIAIJ, 1587 /*20*/ MatAssemblyBegin_MPIAIJ, 1588 MatAssemblyEnd_MPIAIJ, 1589 0, 1590 MatSetOption_MPIAIJ, 1591 MatZeroEntries_MPIAIJ, 1592 /*25*/ MatZeroRows_MPIAIJ, 1593 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1594 MatLUFactorSymbolic_MPIAIJ_TFS, 1595 #else 1596 0, 1597 #endif 1598 0, 1599 0, 1600 0, 1601 /*30*/ MatSetUpPreallocation_MPIAIJ, 1602 0, 1603 0, 1604 0, 1605 0, 1606 /*35*/ MatDuplicate_MPIAIJ, 1607 0, 1608 0, 1609 0, 1610 0, 1611 /*40*/ MatAXPY_MPIAIJ, 1612 MatGetSubMatrices_MPIAIJ, 1613 MatIncreaseOverlap_MPIAIJ, 1614 MatGetValues_MPIAIJ, 1615 MatCopy_MPIAIJ, 1616 /*45*/ MatPrintHelp_MPIAIJ, 1617 MatScale_MPIAIJ, 1618 0, 1619 0, 1620 0, 1621 /*50*/ MatGetBlockSize_MPIAIJ, 1622 0, 1623 0, 1624 0, 1625 0, 1626 /*55*/ MatFDColoringCreate_MPIAIJ, 1627 0, 1628 MatSetUnfactored_MPIAIJ, 1629 0, 1630 0, 1631 /*60*/ MatGetSubMatrix_MPIAIJ, 1632 MatDestroy_MPIAIJ, 1633 MatView_MPIAIJ, 1634 MatGetPetscMaps_Petsc, 1635 0, 1636 /*65*/ 0, 1637 0, 1638 0, 1639 0, 1640 0, 1641 /*70*/ 0, 1642 0, 1643 MatSetColoring_MPIAIJ, 1644 MatSetValuesAdic_MPIAIJ, 1645 MatSetValuesAdifor_MPIAIJ, 1646 /*75*/ 0, 1647 0, 1648 0, 1649 0, 1650 0, 1651 /*80*/ 0, 1652 0, 1653 0, 1654 0, 1655 0, 1656 /*85*/ MatLoad_MPIAIJ}; 1657 1658 /* ----------------------------------------------------------------------------------------*/ 1659 1660 EXTERN_C_BEGIN 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1663 int MatStoreValues_MPIAIJ(Mat mat) 1664 { 1665 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1666 int ierr; 1667 1668 PetscFunctionBegin; 1669 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1670 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1671 PetscFunctionReturn(0); 1672 } 1673 EXTERN_C_END 1674 1675 EXTERN_C_BEGIN 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1678 int MatRetrieveValues_MPIAIJ(Mat mat) 1679 { 1680 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1681 int ierr; 1682 1683 PetscFunctionBegin; 1684 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1685 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 EXTERN_C_END 1689 1690 #include "petscpc.h" 1691 EXTERN_C_BEGIN 1692 #undef __FUNCT__ 1693 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1694 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1695 { 1696 Mat_MPIAIJ *b; 1697 int ierr,i; 1698 1699 PetscFunctionBegin; 1700 B->preallocated = PETSC_TRUE; 1701 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1702 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1703 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1704 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1705 if (d_nnz) { 1706 for (i=0; i<B->m; i++) { 1707 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1708 } 1709 } 1710 if (o_nnz) { 1711 for (i=0; i<B->m; i++) { 1712 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1713 } 1714 } 1715 b = (Mat_MPIAIJ*)B->data; 1716 1717 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1718 PetscLogObjectParent(B,b->A); 1719 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1720 PetscLogObjectParent(B,b->B); 1721 1722 PetscFunctionReturn(0); 1723 } 1724 EXTERN_C_END 1725 1726 /*MC 1727 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1728 1729 Options Database Keys: 1730 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1731 1732 Level: beginner 1733 1734 .seealso: MatCreateMPIAIJ 1735 M*/ 1736 1737 EXTERN_C_BEGIN 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatCreate_MPIAIJ" 1740 int MatCreate_MPIAIJ(Mat B) 1741 { 1742 Mat_MPIAIJ *b; 1743 int ierr,i,size; 1744 1745 PetscFunctionBegin; 1746 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1747 1748 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1749 B->data = (void*)b; 1750 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1751 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1752 B->factor = 0; 1753 B->assembled = PETSC_FALSE; 1754 B->mapping = 0; 1755 1756 B->insertmode = NOT_SET_VALUES; 1757 b->size = size; 1758 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1759 1760 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1761 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1762 1763 /* the information in the maps duplicates the information computed below, eventually 1764 we should remove the duplicate information that is not contained in the maps */ 1765 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1766 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1767 1768 /* build local table of row and column ownerships */ 1769 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1770 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1771 b->cowners = b->rowners + b->size + 2; 1772 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1773 b->rowners[0] = 0; 1774 for (i=2; i<=b->size; i++) { 1775 b->rowners[i] += b->rowners[i-1]; 1776 } 1777 b->rstart = b->rowners[b->rank]; 1778 b->rend = b->rowners[b->rank+1]; 1779 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1780 b->cowners[0] = 0; 1781 for (i=2; i<=b->size; i++) { 1782 b->cowners[i] += b->cowners[i-1]; 1783 } 1784 b->cstart = b->cowners[b->rank]; 1785 b->cend = b->cowners[b->rank+1]; 1786 1787 /* build cache for off array entries formed */ 1788 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1789 b->donotstash = PETSC_FALSE; 1790 b->colmap = 0; 1791 b->garray = 0; 1792 b->roworiented = PETSC_TRUE; 1793 1794 /* stuff used for matrix vector multiply */ 1795 b->lvec = PETSC_NULL; 1796 b->Mvctx = PETSC_NULL; 1797 1798 /* stuff for MatGetRow() */ 1799 b->rowindices = 0; 1800 b->rowvalues = 0; 1801 b->getrowactive = PETSC_FALSE; 1802 1803 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1804 "MatStoreValues_MPIAIJ", 1805 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1806 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1807 "MatRetrieveValues_MPIAIJ", 1808 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1809 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1810 "MatGetDiagonalBlock_MPIAIJ", 1811 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1812 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1813 "MatIsTranspose_MPIAIJ", 1814 MatIsTranspose_MPIAIJ); CHKERRQ(ierr); 1815 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1816 "MatMPIAIJSetPreallocation_MPIAIJ", 1817 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1818 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1819 "MatDiagonalScaleLocal_MPIAIJ", 1820 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 EXTERN_C_END 1824 1825 /*MC 1826 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1827 1828 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1829 and MATMPIAIJ otherwise. 1830 1831 Options Database Keys: 1832 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1833 1834 Level: beginner 1835 1836 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1837 M*/ 1838 1839 EXTERN_C_BEGIN 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatCreate_AIJ" 1842 int MatCreate_AIJ(Mat A) { 1843 int ierr,size; 1844 1845 PetscFunctionBegin; 1846 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1847 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1848 if (size == 1) { 1849 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1850 } else { 1851 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1852 } 1853 PetscFunctionReturn(0); 1854 } 1855 EXTERN_C_END 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1859 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1860 { 1861 Mat mat; 1862 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1863 int ierr; 1864 1865 PetscFunctionBegin; 1866 *newmat = 0; 1867 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1868 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1869 a = (Mat_MPIAIJ*)mat->data; 1870 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1871 mat->factor = matin->factor; 1872 mat->assembled = PETSC_TRUE; 1873 mat->insertmode = NOT_SET_VALUES; 1874 mat->preallocated = PETSC_TRUE; 1875 1876 a->rstart = oldmat->rstart; 1877 a->rend = oldmat->rend; 1878 a->cstart = oldmat->cstart; 1879 a->cend = oldmat->cend; 1880 a->size = oldmat->size; 1881 a->rank = oldmat->rank; 1882 a->donotstash = oldmat->donotstash; 1883 a->roworiented = oldmat->roworiented; 1884 a->rowindices = 0; 1885 a->rowvalues = 0; 1886 a->getrowactive = PETSC_FALSE; 1887 1888 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1889 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1890 if (oldmat->colmap) { 1891 #if defined (PETSC_USE_CTABLE) 1892 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1893 #else 1894 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1895 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1896 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1897 #endif 1898 } else a->colmap = 0; 1899 if (oldmat->garray) { 1900 int len; 1901 len = oldmat->B->n; 1902 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1903 PetscLogObjectMemory(mat,len*sizeof(int)); 1904 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1905 } else a->garray = 0; 1906 1907 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1908 PetscLogObjectParent(mat,a->lvec); 1909 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1910 PetscLogObjectParent(mat,a->Mvctx); 1911 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1912 PetscLogObjectParent(mat,a->A); 1913 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1914 PetscLogObjectParent(mat,a->B); 1915 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1916 *newmat = mat; 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #include "petscsys.h" 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatLoad_MPIAIJ" 1924 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1925 { 1926 Mat A; 1927 PetscScalar *vals,*svals; 1928 MPI_Comm comm = ((PetscObject)viewer)->comm; 1929 MPI_Status status; 1930 int i,nz,ierr,j,rstart,rend,fd; 1931 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1932 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1933 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1934 1935 PetscFunctionBegin; 1936 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1937 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1938 if (!rank) { 1939 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1940 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1941 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1942 if (header[3] < 0) { 1943 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1944 } 1945 } 1946 1947 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1948 M = header[1]; N = header[2]; 1949 /* determine ownership of all rows */ 1950 m = M/size + ((M % size) > rank); 1951 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1952 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1953 rowners[0] = 0; 1954 for (i=2; i<=size; i++) { 1955 rowners[i] += rowners[i-1]; 1956 } 1957 rstart = rowners[rank]; 1958 rend = rowners[rank+1]; 1959 1960 /* distribute row lengths to all processors */ 1961 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1962 offlens = ourlens + (rend-rstart); 1963 if (!rank) { 1964 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1965 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1966 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1967 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1968 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1969 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1970 } else { 1971 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1972 } 1973 1974 if (!rank) { 1975 /* calculate the number of nonzeros on each processor */ 1976 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1977 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1978 for (i=0; i<size; i++) { 1979 for (j=rowners[i]; j< rowners[i+1]; j++) { 1980 procsnz[i] += rowlengths[j]; 1981 } 1982 } 1983 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1984 1985 /* determine max buffer needed and allocate it */ 1986 maxnz = 0; 1987 for (i=0; i<size; i++) { 1988 maxnz = PetscMax(maxnz,procsnz[i]); 1989 } 1990 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1991 1992 /* read in my part of the matrix column indices */ 1993 nz = procsnz[0]; 1994 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1995 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1996 1997 /* read in every one elses and ship off */ 1998 for (i=1; i<size; i++) { 1999 nz = procsnz[i]; 2000 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2001 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2002 } 2003 ierr = PetscFree(cols);CHKERRQ(ierr); 2004 } else { 2005 /* determine buffer space needed for message */ 2006 nz = 0; 2007 for (i=0; i<m; i++) { 2008 nz += ourlens[i]; 2009 } 2010 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2011 2012 /* receive message of column indices*/ 2013 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2014 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2015 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2016 } 2017 2018 /* determine column ownership if matrix is not square */ 2019 if (N != M) { 2020 n = N/size + ((N % size) > rank); 2021 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2022 cstart = cend - n; 2023 } else { 2024 cstart = rstart; 2025 cend = rend; 2026 n = cend - cstart; 2027 } 2028 2029 /* loop over local rows, determining number of off diagonal entries */ 2030 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2031 jj = 0; 2032 for (i=0; i<m; i++) { 2033 for (j=0; j<ourlens[i]; j++) { 2034 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2035 jj++; 2036 } 2037 } 2038 2039 /* create our matrix */ 2040 for (i=0; i<m; i++) { 2041 ourlens[i] -= offlens[i]; 2042 } 2043 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2044 ierr = MatSetType(A,type);CHKERRQ(ierr); 2045 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2046 2047 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2048 for (i=0; i<m; i++) { 2049 ourlens[i] += offlens[i]; 2050 } 2051 2052 if (!rank) { 2053 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2054 2055 /* read in my part of the matrix numerical values */ 2056 nz = procsnz[0]; 2057 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2058 2059 /* insert into matrix */ 2060 jj = rstart; 2061 smycols = mycols; 2062 svals = vals; 2063 for (i=0; i<m; i++) { 2064 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2065 smycols += ourlens[i]; 2066 svals += ourlens[i]; 2067 jj++; 2068 } 2069 2070 /* read in other processors and ship out */ 2071 for (i=1; i<size; i++) { 2072 nz = procsnz[i]; 2073 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2074 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2075 } 2076 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2077 } else { 2078 /* receive numeric values */ 2079 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2080 2081 /* receive message of values*/ 2082 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2083 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2084 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2085 2086 /* insert into matrix */ 2087 jj = rstart; 2088 smycols = mycols; 2089 svals = vals; 2090 for (i=0; i<m; i++) { 2091 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2092 smycols += ourlens[i]; 2093 svals += ourlens[i]; 2094 jj++; 2095 } 2096 } 2097 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2098 ierr = PetscFree(vals);CHKERRQ(ierr); 2099 ierr = PetscFree(mycols);CHKERRQ(ierr); 2100 ierr = PetscFree(rowners);CHKERRQ(ierr); 2101 2102 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2103 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2104 *newmat = A; 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2110 /* 2111 Not great since it makes two copies of the submatrix, first an SeqAIJ 2112 in local and then by concatenating the local matrices the end result. 2113 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2114 */ 2115 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2116 { 2117 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2118 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2119 Mat *local,M,Mreuse; 2120 PetscScalar *vwork,*aa; 2121 MPI_Comm comm = mat->comm; 2122 Mat_SeqAIJ *aij; 2123 2124 2125 PetscFunctionBegin; 2126 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2127 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2128 2129 if (call == MAT_REUSE_MATRIX) { 2130 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2131 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2132 local = &Mreuse; 2133 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2134 } else { 2135 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2136 Mreuse = *local; 2137 ierr = PetscFree(local);CHKERRQ(ierr); 2138 } 2139 2140 /* 2141 m - number of local rows 2142 n - number of columns (same on all processors) 2143 rstart - first row in new global matrix generated 2144 */ 2145 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2146 if (call == MAT_INITIAL_MATRIX) { 2147 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2148 ii = aij->i; 2149 jj = aij->j; 2150 2151 /* 2152 Determine the number of non-zeros in the diagonal and off-diagonal 2153 portions of the matrix in order to do correct preallocation 2154 */ 2155 2156 /* first get start and end of "diagonal" columns */ 2157 if (csize == PETSC_DECIDE) { 2158 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2159 if (mglobal == n) { /* square matrix */ 2160 nlocal = m; 2161 } else { 2162 nlocal = n/size + ((n % size) > rank); 2163 } 2164 } else { 2165 nlocal = csize; 2166 } 2167 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2168 rstart = rend - nlocal; 2169 if (rank == size - 1 && rend != n) { 2170 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2171 } 2172 2173 /* next, compute all the lengths */ 2174 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2175 olens = dlens + m; 2176 for (i=0; i<m; i++) { 2177 jend = ii[i+1] - ii[i]; 2178 olen = 0; 2179 dlen = 0; 2180 for (j=0; j<jend; j++) { 2181 if (*jj < rstart || *jj >= rend) olen++; 2182 else dlen++; 2183 jj++; 2184 } 2185 olens[i] = olen; 2186 dlens[i] = dlen; 2187 } 2188 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2189 ierr = PetscFree(dlens);CHKERRQ(ierr); 2190 } else { 2191 int ml,nl; 2192 2193 M = *newmat; 2194 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2195 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2196 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2197 /* 2198 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2199 rather than the slower MatSetValues(). 2200 */ 2201 M->was_assembled = PETSC_TRUE; 2202 M->assembled = PETSC_FALSE; 2203 } 2204 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2205 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2206 ii = aij->i; 2207 jj = aij->j; 2208 aa = aij->a; 2209 for (i=0; i<m; i++) { 2210 row = rstart + i; 2211 nz = ii[i+1] - ii[i]; 2212 cwork = jj; jj += nz; 2213 vwork = aa; aa += nz; 2214 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2215 } 2216 2217 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2218 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2219 *newmat = M; 2220 2221 /* save submatrix used in processor for next request */ 2222 if (call == MAT_INITIAL_MATRIX) { 2223 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2224 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2225 } 2226 2227 PetscFunctionReturn(0); 2228 } 2229 2230 #undef __FUNCT__ 2231 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2232 /*@C 2233 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2234 (the default parallel PETSc format). For good matrix assembly performance 2235 the user should preallocate the matrix storage by setting the parameters 2236 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2237 performance can be increased by more than a factor of 50. 2238 2239 Collective on MPI_Comm 2240 2241 Input Parameters: 2242 + A - the matrix 2243 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2244 (same value is used for all local rows) 2245 . d_nnz - array containing the number of nonzeros in the various rows of the 2246 DIAGONAL portion of the local submatrix (possibly different for each row) 2247 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2248 The size of this array is equal to the number of local rows, i.e 'm'. 2249 You must leave room for the diagonal entry even if it is zero. 2250 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2251 submatrix (same value is used for all local rows). 2252 - o_nnz - array containing the number of nonzeros in the various rows of the 2253 OFF-DIAGONAL portion of the local submatrix (possibly different for 2254 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2255 structure. The size of this array is equal to the number 2256 of local rows, i.e 'm'. 2257 2258 The AIJ format (also called the Yale sparse matrix format or 2259 compressed row storage), is fully compatible with standard Fortran 77 2260 storage. That is, the stored row and column indices can begin at 2261 either one (as in Fortran) or zero. See the users manual for details. 2262 2263 The user MUST specify either the local or global matrix dimensions 2264 (possibly both). 2265 2266 The parallel matrix is partitioned such that the first m0 rows belong to 2267 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2268 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2269 2270 The DIAGONAL portion of the local submatrix of a processor can be defined 2271 as the submatrix which is obtained by extraction the part corresponding 2272 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2273 first row that belongs to the processor, and r2 is the last row belonging 2274 to the this processor. This is a square mxm matrix. The remaining portion 2275 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2276 2277 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2278 2279 By default, this format uses inodes (identical nodes) when possible. 2280 We search for consecutive rows with the same nonzero structure, thereby 2281 reusing matrix information to achieve increased efficiency. 2282 2283 Options Database Keys: 2284 + -mat_aij_no_inode - Do not use inodes 2285 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2286 - -mat_aij_oneindex - Internally use indexing starting at 1 2287 rather than 0. Note that when calling MatSetValues(), 2288 the user still MUST index entries starting at 0! 2289 2290 Example usage: 2291 2292 Consider the following 8x8 matrix with 34 non-zero values, that is 2293 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2294 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2295 as follows: 2296 2297 .vb 2298 1 2 0 | 0 3 0 | 0 4 2299 Proc0 0 5 6 | 7 0 0 | 8 0 2300 9 0 10 | 11 0 0 | 12 0 2301 ------------------------------------- 2302 13 0 14 | 15 16 17 | 0 0 2303 Proc1 0 18 0 | 19 20 21 | 0 0 2304 0 0 0 | 22 23 0 | 24 0 2305 ------------------------------------- 2306 Proc2 25 26 27 | 0 0 28 | 29 0 2307 30 0 0 | 31 32 33 | 0 34 2308 .ve 2309 2310 This can be represented as a collection of submatrices as: 2311 2312 .vb 2313 A B C 2314 D E F 2315 G H I 2316 .ve 2317 2318 Where the submatrices A,B,C are owned by proc0, D,E,F are 2319 owned by proc1, G,H,I are owned by proc2. 2320 2321 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2322 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2323 The 'M','N' parameters are 8,8, and have the same values on all procs. 2324 2325 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2326 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2327 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2328 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2329 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2330 matrix, ans [DF] as another SeqAIJ matrix. 2331 2332 When d_nz, o_nz parameters are specified, d_nz storage elements are 2333 allocated for every row of the local diagonal submatrix, and o_nz 2334 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2335 One way to choose d_nz and o_nz is to use the max nonzerors per local 2336 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2337 In this case, the values of d_nz,o_nz are: 2338 .vb 2339 proc0 : dnz = 2, o_nz = 2 2340 proc1 : dnz = 3, o_nz = 2 2341 proc2 : dnz = 1, o_nz = 4 2342 .ve 2343 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2344 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2345 for proc3. i.e we are using 12+15+10=37 storage locations to store 2346 34 values. 2347 2348 When d_nnz, o_nnz parameters are specified, the storage is specified 2349 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2350 In the above case the values for d_nnz,o_nnz are: 2351 .vb 2352 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2353 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2354 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2355 .ve 2356 Here the space allocated is sum of all the above values i.e 34, and 2357 hence pre-allocation is perfect. 2358 2359 Level: intermediate 2360 2361 .keywords: matrix, aij, compressed row, sparse, parallel 2362 2363 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2364 @*/ 2365 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2366 { 2367 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2368 2369 PetscFunctionBegin; 2370 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2371 if (f) { 2372 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2373 } 2374 PetscFunctionReturn(0); 2375 } 2376 2377 #undef __FUNCT__ 2378 #define __FUNCT__ "MatCreateMPIAIJ" 2379 /*@C 2380 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2381 (the default parallel PETSc format). For good matrix assembly performance 2382 the user should preallocate the matrix storage by setting the parameters 2383 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2384 performance can be increased by more than a factor of 50. 2385 2386 Collective on MPI_Comm 2387 2388 Input Parameters: 2389 + comm - MPI communicator 2390 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2391 This value should be the same as the local size used in creating the 2392 y vector for the matrix-vector product y = Ax. 2393 . n - This value should be the same as the local size used in creating the 2394 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2395 calculated if N is given) For square matrices n is almost always m. 2396 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2397 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2398 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2399 (same value is used for all local rows) 2400 . d_nnz - array containing the number of nonzeros in the various rows of the 2401 DIAGONAL portion of the local submatrix (possibly different for each row) 2402 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2403 The size of this array is equal to the number of local rows, i.e 'm'. 2404 You must leave room for the diagonal entry even if it is zero. 2405 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2406 submatrix (same value is used for all local rows). 2407 - o_nnz - array containing the number of nonzeros in the various rows of the 2408 OFF-DIAGONAL portion of the local submatrix (possibly different for 2409 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2410 structure. The size of this array is equal to the number 2411 of local rows, i.e 'm'. 2412 2413 Output Parameter: 2414 . A - the matrix 2415 2416 Notes: 2417 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2418 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2419 storage requirements for this matrix. 2420 2421 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2422 processor than it must be used on all processors that share the object for 2423 that argument. 2424 2425 The AIJ format (also called the Yale sparse matrix format or 2426 compressed row storage), is fully compatible with standard Fortran 77 2427 storage. That is, the stored row and column indices can begin at 2428 either one (as in Fortran) or zero. See the users manual for details. 2429 2430 The user MUST specify either the local or global matrix dimensions 2431 (possibly both). 2432 2433 The parallel matrix is partitioned such that the first m0 rows belong to 2434 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2435 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2436 2437 The DIAGONAL portion of the local submatrix of a processor can be defined 2438 as the submatrix which is obtained by extraction the part corresponding 2439 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2440 first row that belongs to the processor, and r2 is the last row belonging 2441 to the this processor. This is a square mxm matrix. The remaining portion 2442 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2443 2444 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2445 2446 When calling this routine with a single process communicator, a matrix of 2447 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2448 type of communicator, use the construction mechanism: 2449 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2450 2451 By default, this format uses inodes (identical nodes) when possible. 2452 We search for consecutive rows with the same nonzero structure, thereby 2453 reusing matrix information to achieve increased efficiency. 2454 2455 Options Database Keys: 2456 + -mat_aij_no_inode - Do not use inodes 2457 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2458 - -mat_aij_oneindex - Internally use indexing starting at 1 2459 rather than 0. Note that when calling MatSetValues(), 2460 the user still MUST index entries starting at 0! 2461 2462 2463 Example usage: 2464 2465 Consider the following 8x8 matrix with 34 non-zero values, that is 2466 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2467 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2468 as follows: 2469 2470 .vb 2471 1 2 0 | 0 3 0 | 0 4 2472 Proc0 0 5 6 | 7 0 0 | 8 0 2473 9 0 10 | 11 0 0 | 12 0 2474 ------------------------------------- 2475 13 0 14 | 15 16 17 | 0 0 2476 Proc1 0 18 0 | 19 20 21 | 0 0 2477 0 0 0 | 22 23 0 | 24 0 2478 ------------------------------------- 2479 Proc2 25 26 27 | 0 0 28 | 29 0 2480 30 0 0 | 31 32 33 | 0 34 2481 .ve 2482 2483 This can be represented as a collection of submatrices as: 2484 2485 .vb 2486 A B C 2487 D E F 2488 G H I 2489 .ve 2490 2491 Where the submatrices A,B,C are owned by proc0, D,E,F are 2492 owned by proc1, G,H,I are owned by proc2. 2493 2494 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2495 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2496 The 'M','N' parameters are 8,8, and have the same values on all procs. 2497 2498 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2499 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2500 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2501 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2502 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2503 matrix, ans [DF] as another SeqAIJ matrix. 2504 2505 When d_nz, o_nz parameters are specified, d_nz storage elements are 2506 allocated for every row of the local diagonal submatrix, and o_nz 2507 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2508 One way to choose d_nz and o_nz is to use the max nonzerors per local 2509 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2510 In this case, the values of d_nz,o_nz are: 2511 .vb 2512 proc0 : dnz = 2, o_nz = 2 2513 proc1 : dnz = 3, o_nz = 2 2514 proc2 : dnz = 1, o_nz = 4 2515 .ve 2516 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2517 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2518 for proc3. i.e we are using 12+15+10=37 storage locations to store 2519 34 values. 2520 2521 When d_nnz, o_nnz parameters are specified, the storage is specified 2522 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2523 In the above case the values for d_nnz,o_nnz are: 2524 .vb 2525 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2526 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2527 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2528 .ve 2529 Here the space allocated is sum of all the above values i.e 34, and 2530 hence pre-allocation is perfect. 2531 2532 Level: intermediate 2533 2534 .keywords: matrix, aij, compressed row, sparse, parallel 2535 2536 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2537 @*/ 2538 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2539 { 2540 int ierr,size; 2541 2542 PetscFunctionBegin; 2543 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2544 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2545 if (size > 1) { 2546 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2547 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2548 } else { 2549 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2550 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2551 } 2552 PetscFunctionReturn(0); 2553 } 2554 2555 #undef __FUNCT__ 2556 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2557 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2558 { 2559 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2560 PetscFunctionBegin; 2561 *Ad = a->A; 2562 *Ao = a->B; 2563 *colmap = a->garray; 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2569 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2570 { 2571 int ierr,i; 2572 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2573 2574 PetscFunctionBegin; 2575 if (coloring->ctype == IS_COLORING_LOCAL) { 2576 ISColoringValue *allcolors,*colors; 2577 ISColoring ocoloring; 2578 2579 /* set coloring for diagonal portion */ 2580 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2581 2582 /* set coloring for off-diagonal portion */ 2583 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2584 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2585 for (i=0; i<a->B->n; i++) { 2586 colors[i] = allcolors[a->garray[i]]; 2587 } 2588 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2589 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2590 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2591 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2592 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2593 ISColoringValue *colors; 2594 int *larray; 2595 ISColoring ocoloring; 2596 2597 /* set coloring for diagonal portion */ 2598 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2599 for (i=0; i<a->A->n; i++) { 2600 larray[i] = i + a->cstart; 2601 } 2602 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2603 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2604 for (i=0; i<a->A->n; i++) { 2605 colors[i] = coloring->colors[larray[i]]; 2606 } 2607 ierr = PetscFree(larray);CHKERRQ(ierr); 2608 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2609 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2610 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2611 2612 /* set coloring for off-diagonal portion */ 2613 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2614 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2615 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2616 for (i=0; i<a->B->n; i++) { 2617 colors[i] = coloring->colors[larray[i]]; 2618 } 2619 ierr = PetscFree(larray);CHKERRQ(ierr); 2620 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2621 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2622 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2623 } else { 2624 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2625 } 2626 2627 PetscFunctionReturn(0); 2628 } 2629 2630 #undef __FUNCT__ 2631 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2632 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2633 { 2634 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2635 int ierr; 2636 2637 PetscFunctionBegin; 2638 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2639 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2640 PetscFunctionReturn(0); 2641 } 2642 2643 #undef __FUNCT__ 2644 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2645 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2646 { 2647 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2648 int ierr; 2649 2650 PetscFunctionBegin; 2651 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2652 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2653 PetscFunctionReturn(0); 2654 } 2655 2656 #undef __FUNCT__ 2657 #define __FUNCT__ "MatMerge" 2658 /*@C 2659 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2660 matrices from each processor 2661 2662 Collective on MPI_Comm 2663 2664 Input Parameters: 2665 + comm - the communicators the parallel matrix will live on 2666 - inmat - the input sequential matrices 2667 2668 Output Parameter: 2669 . outmat - the parallel matrix generated 2670 2671 Level: advanced 2672 2673 Notes: The number of columns of the matrix in EACH of the seperate files 2674 MUST be the same. 2675 2676 @*/ 2677 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2678 { 2679 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2680 PetscScalar *values; 2681 PetscMap columnmap,rowmap; 2682 2683 PetscFunctionBegin; 2684 2685 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2686 2687 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2688 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2689 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2690 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2691 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2692 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2693 2694 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2695 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2696 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2697 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2698 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2699 2700 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2701 for (i=0;i<m;i++) { 2702 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2703 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2704 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2705 } 2706 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2707 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2708 2709 for (i=0;i<m;i++) { 2710 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2711 I = i + rstart; 2712 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2713 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2714 } 2715 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2716 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2717 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2718 2719 PetscFunctionReturn(0); 2720 } 2721 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatFileSplit" 2724 int MatFileSplit(Mat A,char *outfile) 2725 { 2726 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2727 PetscViewer out; 2728 char *name; 2729 Mat B; 2730 PetscScalar *values; 2731 2732 PetscFunctionBegin; 2733 2734 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2735 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2736 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2737 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2738 for (i=0;i<m;i++) { 2739 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2740 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2741 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2742 } 2743 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2744 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2745 2746 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2747 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2748 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2749 sprintf(name,"%s.%d",outfile,rank); 2750 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2751 ierr = PetscFree(name); 2752 ierr = MatView(B,out);CHKERRQ(ierr); 2753 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2754 ierr = MatDestroy(B);CHKERRQ(ierr); 2755 PetscFunctionReturn(0); 2756 } 2757