1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 int CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 int n = aij->B->n,i,ierr; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 191 int cstart = aij->cstart,cend = aij->cend,row,col; 192 PetscTruth roworiented = aij->roworiented; 193 194 /* Some Variables required in the macro */ 195 Mat A = aij->A; 196 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 197 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 198 PetscScalar *aa = a->a; 199 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 200 Mat B = aij->B; 201 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 202 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 203 PetscScalar *ba = b->a; 204 205 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 206 int nonew = a->nonew,shift=0; 207 PetscScalar *ap; 208 209 PetscFunctionBegin; 210 for (i=0; i<m; i++) { 211 if (im[i] < 0) continue; 212 #if defined(PETSC_USE_BOPT_g) 213 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 214 #endif 215 if (im[i] >= rstart && im[i] < rend) { 216 row = im[i] - rstart; 217 for (j=0; j<n; j++) { 218 if (in[j] >= cstart && in[j] < cend){ 219 col = in[j] - cstart; 220 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 221 if (ignorezeroentries && value == 0.0) continue; 222 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 223 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 224 } else if (in[j] < 0) continue; 225 #if defined(PETSC_USE_BOPT_g) 226 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 227 #endif 228 else { 229 if (mat->was_assembled) { 230 if (!aij->colmap) { 231 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 232 } 233 #if defined (PETSC_USE_CTABLE) 234 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 235 col--; 236 #else 237 col = aij->colmap[in[j]] - 1; 238 #endif 239 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 240 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 241 col = in[j]; 242 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 243 B = aij->B; 244 b = (Mat_SeqAIJ*)B->data; 245 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 246 ba = b->a; 247 } 248 } else col = in[j]; 249 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 250 if (ignorezeroentries && value == 0.0) continue; 251 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (!aij->donotstash) { 257 if (roworiented) { 258 if (ignorezeroentries && v[i*n] == 0.0) continue; 259 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 260 } else { 261 if (ignorezeroentries && v[i] == 0.0) continue; 262 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatGetValues_MPIAIJ" 272 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 275 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 276 int cstart = aij->cstart,cend = aij->cend,row,col; 277 278 PetscFunctionBegin; 279 for (i=0; i<m; i++) { 280 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 281 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 282 if (idxm[i] >= rstart && idxm[i] < rend) { 283 row = idxm[i] - rstart; 284 for (j=0; j<n; j++) { 285 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 286 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 287 if (idxn[j] >= cstart && idxn[j] < cend){ 288 col = idxn[j] - cstart; 289 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 290 } else { 291 if (!aij->colmap) { 292 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 293 } 294 #if defined (PETSC_USE_CTABLE) 295 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 296 col --; 297 #else 298 col = aij->colmap[idxn[j]] - 1; 299 #endif 300 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 301 else { 302 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 303 } 304 } 305 } 306 } else { 307 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNCT__ 314 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 316 { 317 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 318 int ierr,nstash,reallocs; 319 InsertMode addv; 320 321 PetscFunctionBegin; 322 if (aij->donotstash) { 323 PetscFunctionReturn(0); 324 } 325 326 /* make sure all processors are either in INSERTMODE or ADDMODE */ 327 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 328 if (addv == (ADD_VALUES|INSERT_VALUES)) { 329 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 330 } 331 mat->insertmode = addv; /* in case this processor had no cache */ 332 333 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 334 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 335 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 336 PetscFunctionReturn(0); 337 } 338 339 340 #undef __FUNCT__ 341 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 342 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 343 { 344 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 345 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 346 int i,j,rstart,ncols,n,ierr,flg; 347 int *row,*col,other_disassembled; 348 PetscScalar *val; 349 InsertMode addv = mat->insertmode; 350 351 PetscFunctionBegin; 352 if (!aij->donotstash) { 353 while (1) { 354 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 355 if (!flg) break; 356 357 for (i=0; i<n;) { 358 /* Now identify the consecutive vals belonging to the same row */ 359 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 360 if (j < n) ncols = j-i; 361 else ncols = n-i; 362 /* Now assemble all these values with a single function call */ 363 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 364 i = j; 365 } 366 } 367 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 368 } 369 370 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 371 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 372 373 /* determine if any processor has disassembled, if so we must 374 also disassemble ourselfs, in order that we may reassemble. */ 375 /* 376 if nonzero structure of submatrix B cannot change then we know that 377 no processor disassembled thus we can skip this stuff 378 */ 379 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 380 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 381 if (mat->was_assembled && !other_disassembled) { 382 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 383 } 384 } 385 386 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 387 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 388 } 389 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 390 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 391 392 if (aij->rowvalues) { 393 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 394 aij->rowvalues = 0; 395 } 396 397 /* used by MatAXPY() */ 398 a->xtoy = 0; b->xtoy = 0; 399 a->XtoY = 0; b->XtoY = 0; 400 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 406 int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatZeroRows_MPIAIJ" 419 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 422 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 423 int *nprocs,j,idx,nsends,row; 424 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 425 int *rvalues,tag = A->tag,count,base,slen,n,*source; 426 int *lens,imdex,*lrows,*values,rstart=l->rstart; 427 MPI_Comm comm = A->comm; 428 MPI_Request *send_waits,*recv_waits; 429 MPI_Status recv_status,*send_status; 430 IS istmp; 431 PetscTruth found; 432 433 PetscFunctionBegin; 434 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 435 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 436 437 /* first count number of contributors to each processor */ 438 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 439 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 440 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 441 for (i=0; i<N; i++) { 442 idx = rows[i]; 443 found = PETSC_FALSE; 444 for (j=0; j<size; j++) { 445 if (idx >= owners[j] && idx < owners[j+1]) { 446 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 447 } 448 } 449 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 450 } 451 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 452 453 /* inform other processors of number of messages and max length*/ 454 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 455 456 /* post receives: */ 457 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 458 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 459 for (i=0; i<nrecvs; i++) { 460 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 461 } 462 463 /* do sends: 464 1) starts[i] gives the starting index in svalues for stuff going to 465 the ith processor 466 */ 467 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 468 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 469 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 470 starts[0] = 0; 471 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 472 for (i=0; i<N; i++) { 473 svalues[starts[owner[i]]++] = rows[i]; 474 } 475 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 476 477 starts[0] = 0; 478 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 479 count = 0; 480 for (i=0; i<size; i++) { 481 if (nprocs[2*i+1]) { 482 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 483 } 484 } 485 ierr = PetscFree(starts);CHKERRQ(ierr); 486 487 base = owners[rank]; 488 489 /* wait on receives */ 490 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 491 source = lens + nrecvs; 492 count = nrecvs; slen = 0; 493 while (count) { 494 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 495 /* unpack receives into our local space */ 496 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 497 source[imdex] = recv_status.MPI_SOURCE; 498 lens[imdex] = n; 499 slen += n; 500 count--; 501 } 502 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 503 504 /* move the data into the send scatter */ 505 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 506 count = 0; 507 for (i=0; i<nrecvs; i++) { 508 values = rvalues + i*nmax; 509 for (j=0; j<lens[i]; j++) { 510 lrows[count++] = values[j] - base; 511 } 512 } 513 ierr = PetscFree(rvalues);CHKERRQ(ierr); 514 ierr = PetscFree(lens);CHKERRQ(ierr); 515 ierr = PetscFree(owner);CHKERRQ(ierr); 516 ierr = PetscFree(nprocs);CHKERRQ(ierr); 517 518 /* actually zap the local rows */ 519 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 520 PetscLogObjectParent(A,istmp); 521 522 /* 523 Zero the required rows. If the "diagonal block" of the matrix 524 is square and the user wishes to set the diagonal we use seperate 525 code so that MatSetValues() is not called for each diagonal allocating 526 new memory, thus calling lots of mallocs and slowing things down. 527 528 Contributed by: Mathew Knepley 529 */ 530 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 531 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 532 if (diag && (l->A->M == l->A->N)) { 533 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 534 } else if (diag) { 535 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 536 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 537 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 538 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 539 } 540 for (i = 0; i < slen; i++) { 541 row = lrows[i] + rstart; 542 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 543 } 544 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 } else { 547 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 548 } 549 ierr = ISDestroy(istmp);CHKERRQ(ierr); 550 ierr = PetscFree(lrows);CHKERRQ(ierr); 551 552 /* wait on sends */ 553 if (nsends) { 554 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 555 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 556 ierr = PetscFree(send_status);CHKERRQ(ierr); 557 } 558 ierr = PetscFree(send_waits);CHKERRQ(ierr); 559 ierr = PetscFree(svalues);CHKERRQ(ierr); 560 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatMult_MPIAIJ" 566 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 567 { 568 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 569 int ierr,nt; 570 571 PetscFunctionBegin; 572 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 573 if (nt != A->n) { 574 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 575 } 576 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 577 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 578 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 579 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatMultAdd_MPIAIJ" 585 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 586 { 587 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 588 int ierr; 589 590 PetscFunctionBegin; 591 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 593 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 594 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 600 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 603 int ierr; 604 605 PetscFunctionBegin; 606 /* do nondiagonal part */ 607 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 608 /* send it on its way */ 609 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 610 /* do local part */ 611 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 612 /* receive remote parts: note this assumes the values are not actually */ 613 /* inserted in yy until the next line, which is true for my implementation*/ 614 /* but is not perhaps always true. */ 615 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 619 EXTERN_C_BEGIN 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 622 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 623 { 624 MPI_Comm comm; 625 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 626 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 627 IS Me,Notme; 628 int M,N,first,last,*notme,ntids,i, ierr; 629 630 PetscFunctionBegin; 631 632 /* Easy test: symmetric diagonal block */ 633 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 634 ierr = MatIsTranspose(Adia,Bdia,f); CHKERRQ(ierr); 635 if (!*f) PetscFunctionReturn(0); 636 ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr); 637 ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr); 638 if (ntids==1) PetscFunctionReturn(0); 639 640 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 641 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 642 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 643 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 644 for (i=0; i<first; i++) notme[i] = i; 645 for (i=last; i<M; i++) notme[i-last+first] = i; 646 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 647 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 648 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 649 Aoff = Aoffs[0]; 650 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 651 Boff = Boffs[0]; 652 ierr = MatIsTranspose(Aoff,Boff,f); CHKERRQ(ierr); 653 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 654 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 655 ierr = ISDestroy(Me); CHKERRQ(ierr); 656 ierr = ISDestroy(Notme); CHKERRQ(ierr); 657 658 PetscFunctionReturn(0); 659 } 660 EXTERN_C_END 661 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 664 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 665 { 666 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 667 int ierr; 668 669 PetscFunctionBegin; 670 /* do nondiagonal part */ 671 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 672 /* send it on its way */ 673 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 674 /* do local part */ 675 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 676 /* receive remote parts: note this assumes the values are not actually */ 677 /* inserted in yy until the next line, which is true for my implementation*/ 678 /* but is not perhaps always true. */ 679 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 683 /* 684 This only works correctly for square matrices where the subblock A->A is the 685 diagonal block 686 */ 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 689 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 690 { 691 int ierr; 692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 693 694 PetscFunctionBegin; 695 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 696 if (a->rstart != a->cstart || a->rend != a->cend) { 697 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 698 } 699 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatScale_MPIAIJ" 705 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 706 { 707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 708 int ierr; 709 710 PetscFunctionBegin; 711 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 712 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatDestroy_MPIAIJ" 718 int MatDestroy_MPIAIJ(Mat mat) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 int ierr; 722 723 PetscFunctionBegin; 724 #if defined(PETSC_USE_LOG) 725 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 726 #endif 727 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 728 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 729 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 730 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 731 #if defined (PETSC_USE_CTABLE) 732 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 733 #else 734 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 735 #endif 736 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 737 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 738 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 739 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 740 ierr = PetscFree(aij);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatView_MPIAIJ_Binary" 746 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 747 { 748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 749 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 750 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 751 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 752 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 753 PetscScalar *column_values; 754 755 PetscFunctionBegin; 756 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 757 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 758 nz = A->nz + B->nz; 759 if (rank == 0) { 760 header[0] = MAT_FILE_COOKIE; 761 header[1] = mat->M; 762 header[2] = mat->N; 763 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 764 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 766 /* get largest number of rows any processor has */ 767 rlen = mat->m; 768 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 769 for (i=1; i<size; i++) { 770 rlen = PetscMax(rlen,range[i+1] - range[i]); 771 } 772 } else { 773 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 774 rlen = mat->m; 775 } 776 777 /* load up the local row counts */ 778 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 779 for (i=0; i<mat->m; i++) { 780 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 781 } 782 783 /* store the row lengths to the file */ 784 if (rank == 0) { 785 MPI_Status status; 786 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 787 for (i=1; i<size; i++) { 788 rlen = range[i+1] - range[i]; 789 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 790 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 791 } 792 } else { 793 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 794 } 795 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 796 797 /* load up the local column indices */ 798 nzmax = nz; /* )th processor needs space a largest processor needs */ 799 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 800 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 801 cnt = 0; 802 for (i=0; i<mat->m; i++) { 803 for (j=B->i[i]; j<B->i[i+1]; j++) { 804 if ( (col = garray[B->j[j]]) > cstart) break; 805 column_indices[cnt++] = col; 806 } 807 for (k=A->i[i]; k<A->i[i+1]; k++) { 808 column_indices[cnt++] = A->j[k] + cstart; 809 } 810 for (; j<B->i[i+1]; j++) { 811 column_indices[cnt++] = garray[B->j[j]]; 812 } 813 } 814 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 815 816 /* store the column indices to the file */ 817 if (rank == 0) { 818 MPI_Status status; 819 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 820 for (i=1; i<size; i++) { 821 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 823 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 824 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 825 } 826 } else { 827 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(column_indices);CHKERRQ(ierr); 831 832 /* load up the local column values */ 833 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 834 cnt = 0; 835 for (i=0; i<mat->m; i++) { 836 for (j=B->i[i]; j<B->i[i+1]; j++) { 837 if ( garray[B->j[j]] > cstart) break; 838 column_values[cnt++] = B->a[j]; 839 } 840 for (k=A->i[i]; k<A->i[i+1]; k++) { 841 column_values[cnt++] = A->a[k]; 842 } 843 for (; j<B->i[i+1]; j++) { 844 column_values[cnt++] = B->a[j]; 845 } 846 } 847 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 848 849 /* store the column values to the file */ 850 if (rank == 0) { 851 MPI_Status status; 852 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 853 for (i=1; i<size; i++) { 854 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 856 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 857 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 858 } 859 } else { 860 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 861 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 862 } 863 ierr = PetscFree(column_values);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 869 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 870 { 871 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 872 int ierr,rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,isascii,flg,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (isascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 886 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 887 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 888 if (flg) { 889 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 890 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 891 } else { 892 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 893 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 894 } 895 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 896 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 897 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 899 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 900 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } else if (format == PETSC_VIEWER_ASCII_INFO) { 903 PetscFunctionReturn(0); 904 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 905 PetscFunctionReturn(0); 906 } 907 } else if (isbinary) { 908 if (size == 1) { 909 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 910 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 911 } else { 912 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 913 } 914 PetscFunctionReturn(0); 915 } else if (isdraw) { 916 PetscDraw draw; 917 PetscTruth isnull; 918 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 919 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 920 } 921 922 if (size == 1) { 923 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 924 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 925 } else { 926 /* assemble the entire matrix onto first processor. */ 927 Mat A; 928 Mat_SeqAIJ *Aloc; 929 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 930 PetscScalar *a; 931 932 if (!rank) { 933 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 934 } else { 935 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 936 } 937 PetscLogObjectParent(mat,A); 938 939 /* copy over the A part */ 940 Aloc = (Mat_SeqAIJ*)aij->A->data; 941 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 942 row = aij->rstart; 943 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 944 for (i=0; i<m; i++) { 945 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 946 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 947 } 948 aj = Aloc->j; 949 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 950 951 /* copy over the B part */ 952 Aloc = (Mat_SeqAIJ*)aij->B->data; 953 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = aij->rstart; 955 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 956 ct = cols; 957 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 961 } 962 ierr = PetscFree(ct);CHKERRQ(ierr); 963 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 964 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 965 /* 966 Everyone has to call to draw the matrix since the graphics waits are 967 synchronized across all processors that share the PetscDraw object 968 */ 969 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 970 if (!rank) { 971 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 972 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 973 } 974 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 975 ierr = MatDestroy(A);CHKERRQ(ierr); 976 } 977 PetscFunctionReturn(0); 978 } 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatView_MPIAIJ" 982 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 983 { 984 int ierr; 985 PetscTruth isascii,isdraw,issocket,isbinary; 986 987 PetscFunctionBegin; 988 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 989 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 990 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 991 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 992 if (isascii || isdraw || isbinary || issocket) { 993 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 994 } else { 995 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 996 } 997 PetscFunctionReturn(0); 998 } 999 1000 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatRelax_MPIAIJ" 1004 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1005 { 1006 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1007 int ierr; 1008 Vec bb1; 1009 PetscScalar mone=-1.0; 1010 1011 PetscFunctionBegin; 1012 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1013 1014 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1015 1016 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1017 if (flag & SOR_ZERO_INITIAL_GUESS) { 1018 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1019 its--; 1020 } 1021 1022 while (its--) { 1023 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1024 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1025 1026 /* update rhs: bb1 = bb - B*x */ 1027 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1028 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1029 1030 /* local sweep */ 1031 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1032 CHKERRQ(ierr); 1033 } 1034 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1035 if (flag & SOR_ZERO_INITIAL_GUESS) { 1036 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1037 its--; 1038 } 1039 while (its--) { 1040 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1042 1043 /* update rhs: bb1 = bb - B*x */ 1044 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1045 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1046 1047 /* local sweep */ 1048 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1049 CHKERRQ(ierr); 1050 } 1051 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1052 if (flag & SOR_ZERO_INITIAL_GUESS) { 1053 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1054 its--; 1055 } 1056 while (its--) { 1057 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1058 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1059 1060 /* update rhs: bb1 = bb - B*x */ 1061 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1062 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1063 1064 /* local sweep */ 1065 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1066 CHKERRQ(ierr); 1067 } 1068 } else { 1069 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1070 } 1071 1072 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1078 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1079 { 1080 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1081 Mat A = mat->A,B = mat->B; 1082 int ierr; 1083 PetscReal isend[5],irecv[5]; 1084 1085 PetscFunctionBegin; 1086 info->block_size = 1.0; 1087 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1088 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1089 isend[3] = info->memory; isend[4] = info->mallocs; 1090 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1091 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1092 isend[3] += info->memory; isend[4] += info->mallocs; 1093 if (flag == MAT_LOCAL) { 1094 info->nz_used = isend[0]; 1095 info->nz_allocated = isend[1]; 1096 info->nz_unneeded = isend[2]; 1097 info->memory = isend[3]; 1098 info->mallocs = isend[4]; 1099 } else if (flag == MAT_GLOBAL_MAX) { 1100 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1101 info->nz_used = irecv[0]; 1102 info->nz_allocated = irecv[1]; 1103 info->nz_unneeded = irecv[2]; 1104 info->memory = irecv[3]; 1105 info->mallocs = irecv[4]; 1106 } else if (flag == MAT_GLOBAL_SUM) { 1107 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1108 info->nz_used = irecv[0]; 1109 info->nz_allocated = irecv[1]; 1110 info->nz_unneeded = irecv[2]; 1111 info->memory = irecv[3]; 1112 info->mallocs = irecv[4]; 1113 } 1114 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1115 info->fill_ratio_needed = 0; 1116 info->factor_mallocs = 0; 1117 info->rows_global = (double)matin->M; 1118 info->columns_global = (double)matin->N; 1119 info->rows_local = (double)matin->m; 1120 info->columns_local = (double)matin->N; 1121 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatSetOption_MPIAIJ" 1127 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1128 { 1129 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1130 int ierr; 1131 1132 PetscFunctionBegin; 1133 switch (op) { 1134 case MAT_NO_NEW_NONZERO_LOCATIONS: 1135 case MAT_YES_NEW_NONZERO_LOCATIONS: 1136 case MAT_COLUMNS_UNSORTED: 1137 case MAT_COLUMNS_SORTED: 1138 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1139 case MAT_KEEP_ZEROED_ROWS: 1140 case MAT_NEW_NONZERO_LOCATION_ERR: 1141 case MAT_USE_INODES: 1142 case MAT_DO_NOT_USE_INODES: 1143 case MAT_IGNORE_ZERO_ENTRIES: 1144 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1145 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1146 break; 1147 case MAT_ROW_ORIENTED: 1148 a->roworiented = PETSC_TRUE; 1149 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1150 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1151 break; 1152 case MAT_ROWS_SORTED: 1153 case MAT_ROWS_UNSORTED: 1154 case MAT_YES_NEW_DIAGONALS: 1155 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1156 break; 1157 case MAT_COLUMN_ORIENTED: 1158 a->roworiented = PETSC_FALSE; 1159 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1160 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1161 break; 1162 case MAT_IGNORE_OFF_PROC_ENTRIES: 1163 a->donotstash = PETSC_TRUE; 1164 break; 1165 case MAT_NO_NEW_DIAGONALS: 1166 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1167 case MAT_SYMMETRIC: 1168 case MAT_STRUCTURALLY_SYMMETRIC: 1169 case MAT_NOT_SYMMETRIC: 1170 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1171 case MAT_HERMITIAN: 1172 case MAT_NOT_HERMITIAN: 1173 case MAT_SYMMETRY_ETERNAL: 1174 case MAT_NOT_SYMMETRY_ETERNAL: 1175 break; 1176 default: 1177 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1178 } 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatGetRow_MPIAIJ" 1184 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1185 { 1186 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1187 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1188 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1189 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1190 int *cmap,*idx_p; 1191 1192 PetscFunctionBegin; 1193 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1194 mat->getrowactive = PETSC_TRUE; 1195 1196 if (!mat->rowvalues && (idx || v)) { 1197 /* 1198 allocate enough space to hold information from the longest row. 1199 */ 1200 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1201 int max = 1,tmp; 1202 for (i=0; i<matin->m; i++) { 1203 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1204 if (max < tmp) { max = tmp; } 1205 } 1206 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1207 mat->rowindices = (int*)(mat->rowvalues + max); 1208 } 1209 1210 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1211 lrow = row - rstart; 1212 1213 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1214 if (!v) {pvA = 0; pvB = 0;} 1215 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1216 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1217 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1218 nztot = nzA + nzB; 1219 1220 cmap = mat->garray; 1221 if (v || idx) { 1222 if (nztot) { 1223 /* Sort by increasing column numbers, assuming A and B already sorted */ 1224 int imark = -1; 1225 if (v) { 1226 *v = v_p = mat->rowvalues; 1227 for (i=0; i<nzB; i++) { 1228 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1229 else break; 1230 } 1231 imark = i; 1232 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1233 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1234 } 1235 if (idx) { 1236 *idx = idx_p = mat->rowindices; 1237 if (imark > -1) { 1238 for (i=0; i<imark; i++) { 1239 idx_p[i] = cmap[cworkB[i]]; 1240 } 1241 } else { 1242 for (i=0; i<nzB; i++) { 1243 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1244 else break; 1245 } 1246 imark = i; 1247 } 1248 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1249 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1250 } 1251 } else { 1252 if (idx) *idx = 0; 1253 if (v) *v = 0; 1254 } 1255 } 1256 *nz = nztot; 1257 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1258 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1264 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1265 { 1266 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1267 1268 PetscFunctionBegin; 1269 if (aij->getrowactive == PETSC_FALSE) { 1270 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1271 } 1272 aij->getrowactive = PETSC_FALSE; 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatNorm_MPIAIJ" 1278 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1279 { 1280 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1281 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1282 int ierr,i,j,cstart = aij->cstart; 1283 PetscReal sum = 0.0; 1284 PetscScalar *v; 1285 1286 PetscFunctionBegin; 1287 if (aij->size == 1) { 1288 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1289 } else { 1290 if (type == NORM_FROBENIUS) { 1291 v = amat->a; 1292 for (i=0; i<amat->nz; i++) { 1293 #if defined(PETSC_USE_COMPLEX) 1294 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1295 #else 1296 sum += (*v)*(*v); v++; 1297 #endif 1298 } 1299 v = bmat->a; 1300 for (i=0; i<bmat->nz; i++) { 1301 #if defined(PETSC_USE_COMPLEX) 1302 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1303 #else 1304 sum += (*v)*(*v); v++; 1305 #endif 1306 } 1307 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1308 *norm = sqrt(*norm); 1309 } else if (type == NORM_1) { /* max column norm */ 1310 PetscReal *tmp,*tmp2; 1311 int *jj,*garray = aij->garray; 1312 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1313 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1314 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1315 *norm = 0.0; 1316 v = amat->a; jj = amat->j; 1317 for (j=0; j<amat->nz; j++) { 1318 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1319 } 1320 v = bmat->a; jj = bmat->j; 1321 for (j=0; j<bmat->nz; j++) { 1322 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1323 } 1324 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1325 for (j=0; j<mat->N; j++) { 1326 if (tmp2[j] > *norm) *norm = tmp2[j]; 1327 } 1328 ierr = PetscFree(tmp);CHKERRQ(ierr); 1329 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1330 } else if (type == NORM_INFINITY) { /* max row norm */ 1331 PetscReal ntemp = 0.0; 1332 for (j=0; j<aij->A->m; j++) { 1333 v = amat->a + amat->i[j]; 1334 sum = 0.0; 1335 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1336 sum += PetscAbsScalar(*v); v++; 1337 } 1338 v = bmat->a + bmat->i[j]; 1339 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1340 sum += PetscAbsScalar(*v); v++; 1341 } 1342 if (sum > ntemp) ntemp = sum; 1343 } 1344 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1345 } else { 1346 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1347 } 1348 } 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatTranspose_MPIAIJ" 1354 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1355 { 1356 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1357 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1358 int ierr; 1359 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1360 Mat B; 1361 PetscScalar *array; 1362 1363 PetscFunctionBegin; 1364 if (!matout && M != N) { 1365 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1366 } 1367 1368 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1369 1370 /* copy over the A part */ 1371 Aloc = (Mat_SeqAIJ*)a->A->data; 1372 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1373 row = a->rstart; 1374 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1375 for (i=0; i<m; i++) { 1376 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1377 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1378 } 1379 aj = Aloc->j; 1380 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1381 1382 /* copy over the B part */ 1383 Aloc = (Mat_SeqAIJ*)a->B->data; 1384 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1385 row = a->rstart; 1386 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1387 ct = cols; 1388 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1389 for (i=0; i<m; i++) { 1390 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1391 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1392 } 1393 ierr = PetscFree(ct);CHKERRQ(ierr); 1394 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1396 if (matout) { 1397 *matout = B; 1398 } else { 1399 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1406 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1407 { 1408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1409 Mat a = aij->A,b = aij->B; 1410 int ierr,s1,s2,s3; 1411 1412 PetscFunctionBegin; 1413 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1414 if (rr) { 1415 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1416 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1417 /* Overlap communication with computation. */ 1418 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1419 } 1420 if (ll) { 1421 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1422 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1423 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1424 } 1425 /* scale the diagonal block */ 1426 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1427 1428 if (rr) { 1429 /* Do a scatter end and then right scale the off-diagonal block */ 1430 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1431 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1432 } 1433 1434 PetscFunctionReturn(0); 1435 } 1436 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1440 int MatPrintHelp_MPIAIJ(Mat A) 1441 { 1442 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1443 int ierr; 1444 1445 PetscFunctionBegin; 1446 if (!a->rank) { 1447 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1454 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1455 { 1456 PetscFunctionBegin; 1457 *bs = 1; 1458 PetscFunctionReturn(0); 1459 } 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1462 int MatSetUnfactored_MPIAIJ(Mat A) 1463 { 1464 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1465 int ierr; 1466 1467 PetscFunctionBegin; 1468 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatEqual_MPIAIJ" 1474 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1475 { 1476 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1477 Mat a,b,c,d; 1478 PetscTruth flg; 1479 int ierr; 1480 1481 PetscFunctionBegin; 1482 a = matA->A; b = matA->B; 1483 c = matB->A; d = matB->B; 1484 1485 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1486 if (flg == PETSC_TRUE) { 1487 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1488 } 1489 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatCopy_MPIAIJ" 1495 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1496 { 1497 int ierr; 1498 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1499 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1500 1501 PetscFunctionBegin; 1502 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1503 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1504 /* because of the column compression in the off-processor part of the matrix a->B, 1505 the number of columns in a->B and b->B may be different, hence we cannot call 1506 the MatCopy() directly on the two parts. If need be, we can provide a more 1507 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1508 then copying the submatrices */ 1509 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1510 } else { 1511 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1512 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1513 } 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1519 int MatSetUpPreallocation_MPIAIJ(Mat A) 1520 { 1521 int ierr; 1522 1523 PetscFunctionBegin; 1524 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #include "petscblaslapack.h" 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatAXPY_MPIAIJ" 1531 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1532 { 1533 int ierr,one=1,i; 1534 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1535 Mat_SeqAIJ *x,*y; 1536 1537 PetscFunctionBegin; 1538 if (str == SAME_NONZERO_PATTERN) { 1539 x = (Mat_SeqAIJ *)xx->A->data; 1540 y = (Mat_SeqAIJ *)yy->A->data; 1541 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1542 x = (Mat_SeqAIJ *)xx->B->data; 1543 y = (Mat_SeqAIJ *)yy->B->data; 1544 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1545 } else if (str == SUBSET_NONZERO_PATTERN) { 1546 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1547 1548 x = (Mat_SeqAIJ *)xx->B->data; 1549 y = (Mat_SeqAIJ *)yy->B->data; 1550 if (y->xtoy && y->XtoY != xx->B) { 1551 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1552 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1553 } 1554 if (!y->xtoy) { /* get xtoy */ 1555 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1556 y->XtoY = xx->B; 1557 } 1558 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1559 } else { 1560 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 /* -------------------------------------------------------------------*/ 1566 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1567 MatGetRow_MPIAIJ, 1568 MatRestoreRow_MPIAIJ, 1569 MatMult_MPIAIJ, 1570 /* 4*/ MatMultAdd_MPIAIJ, 1571 MatMultTranspose_MPIAIJ, 1572 MatMultTransposeAdd_MPIAIJ, 1573 0, 1574 0, 1575 0, 1576 /*10*/ 0, 1577 0, 1578 0, 1579 MatRelax_MPIAIJ, 1580 MatTranspose_MPIAIJ, 1581 /*15*/ MatGetInfo_MPIAIJ, 1582 MatEqual_MPIAIJ, 1583 MatGetDiagonal_MPIAIJ, 1584 MatDiagonalScale_MPIAIJ, 1585 MatNorm_MPIAIJ, 1586 /*20*/ MatAssemblyBegin_MPIAIJ, 1587 MatAssemblyEnd_MPIAIJ, 1588 0, 1589 MatSetOption_MPIAIJ, 1590 MatZeroEntries_MPIAIJ, 1591 /*25*/ MatZeroRows_MPIAIJ, 1592 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1593 MatLUFactorSymbolic_MPIAIJ_TFS, 1594 #else 1595 0, 1596 #endif 1597 0, 1598 0, 1599 0, 1600 /*30*/ MatSetUpPreallocation_MPIAIJ, 1601 0, 1602 0, 1603 0, 1604 0, 1605 /*35*/ MatDuplicate_MPIAIJ, 1606 0, 1607 0, 1608 0, 1609 0, 1610 /*40*/ MatAXPY_MPIAIJ, 1611 MatGetSubMatrices_MPIAIJ, 1612 MatIncreaseOverlap_MPIAIJ, 1613 MatGetValues_MPIAIJ, 1614 MatCopy_MPIAIJ, 1615 /*45*/ MatPrintHelp_MPIAIJ, 1616 MatScale_MPIAIJ, 1617 0, 1618 0, 1619 0, 1620 /*50*/ MatGetBlockSize_MPIAIJ, 1621 0, 1622 0, 1623 0, 1624 0, 1625 /*55*/ MatFDColoringCreate_MPIAIJ, 1626 0, 1627 MatSetUnfactored_MPIAIJ, 1628 0, 1629 0, 1630 /*60*/ MatGetSubMatrix_MPIAIJ, 1631 MatDestroy_MPIAIJ, 1632 MatView_MPIAIJ, 1633 MatGetPetscMaps_Petsc, 1634 0, 1635 /*65*/ 0, 1636 0, 1637 0, 1638 0, 1639 0, 1640 /*70*/ 0, 1641 0, 1642 MatSetColoring_MPIAIJ, 1643 MatSetValuesAdic_MPIAIJ, 1644 MatSetValuesAdifor_MPIAIJ, 1645 /*75*/ 0, 1646 0, 1647 0, 1648 0, 1649 0, 1650 /*80*/ 0, 1651 0, 1652 0, 1653 0, 1654 /*85*/ MatLoad_MPIAIJ}; 1655 1656 /* ----------------------------------------------------------------------------------------*/ 1657 1658 EXTERN_C_BEGIN 1659 #undef __FUNCT__ 1660 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1661 int MatStoreValues_MPIAIJ(Mat mat) 1662 { 1663 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1664 int ierr; 1665 1666 PetscFunctionBegin; 1667 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1668 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1669 PetscFunctionReturn(0); 1670 } 1671 EXTERN_C_END 1672 1673 EXTERN_C_BEGIN 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1676 int MatRetrieveValues_MPIAIJ(Mat mat) 1677 { 1678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1679 int ierr; 1680 1681 PetscFunctionBegin; 1682 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1683 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 EXTERN_C_END 1687 1688 #include "petscpc.h" 1689 EXTERN_C_BEGIN 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1692 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1693 { 1694 Mat_MPIAIJ *b; 1695 int ierr,i; 1696 1697 PetscFunctionBegin; 1698 B->preallocated = PETSC_TRUE; 1699 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1700 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1701 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1702 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1703 if (d_nnz) { 1704 for (i=0; i<B->m; i++) { 1705 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1706 } 1707 } 1708 if (o_nnz) { 1709 for (i=0; i<B->m; i++) { 1710 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1711 } 1712 } 1713 b = (Mat_MPIAIJ*)B->data; 1714 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1715 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1716 1717 PetscFunctionReturn(0); 1718 } 1719 EXTERN_C_END 1720 1721 /*MC 1722 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1723 1724 Options Database Keys: 1725 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1726 1727 Level: beginner 1728 1729 .seealso: MatCreateMPIAIJ 1730 M*/ 1731 1732 EXTERN_C_BEGIN 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatCreate_MPIAIJ" 1735 int MatCreate_MPIAIJ(Mat B) 1736 { 1737 Mat_MPIAIJ *b; 1738 int ierr,i,size; 1739 1740 PetscFunctionBegin; 1741 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1742 1743 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1744 B->data = (void*)b; 1745 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1746 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1747 B->factor = 0; 1748 B->assembled = PETSC_FALSE; 1749 B->mapping = 0; 1750 1751 B->insertmode = NOT_SET_VALUES; 1752 b->size = size; 1753 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1754 1755 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1756 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1757 1758 /* the information in the maps duplicates the information computed below, eventually 1759 we should remove the duplicate information that is not contained in the maps */ 1760 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1761 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1762 1763 /* build local table of row and column ownerships */ 1764 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1765 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1766 b->cowners = b->rowners + b->size + 2; 1767 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1768 b->rowners[0] = 0; 1769 for (i=2; i<=b->size; i++) { 1770 b->rowners[i] += b->rowners[i-1]; 1771 } 1772 b->rstart = b->rowners[b->rank]; 1773 b->rend = b->rowners[b->rank+1]; 1774 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1775 b->cowners[0] = 0; 1776 for (i=2; i<=b->size; i++) { 1777 b->cowners[i] += b->cowners[i-1]; 1778 } 1779 b->cstart = b->cowners[b->rank]; 1780 b->cend = b->cowners[b->rank+1]; 1781 1782 /* build cache for off array entries formed */ 1783 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1784 b->donotstash = PETSC_FALSE; 1785 b->colmap = 0; 1786 b->garray = 0; 1787 b->roworiented = PETSC_TRUE; 1788 1789 /* stuff used for matrix vector multiply */ 1790 b->lvec = PETSC_NULL; 1791 b->Mvctx = PETSC_NULL; 1792 1793 /* stuff for MatGetRow() */ 1794 b->rowindices = 0; 1795 b->rowvalues = 0; 1796 b->getrowactive = PETSC_FALSE; 1797 1798 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1799 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1800 PetscLogObjectParent(B,b->A); 1801 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1802 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1803 PetscLogObjectParent(B,b->B); 1804 1805 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1806 "MatStoreValues_MPIAIJ", 1807 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1808 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1809 "MatRetrieveValues_MPIAIJ", 1810 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1811 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1812 "MatGetDiagonalBlock_MPIAIJ", 1813 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1814 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1815 "MatIsTranspose_MPIAIJ", 1816 MatIsTranspose_MPIAIJ); CHKERRQ(ierr); 1817 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1818 "MatMPIAIJSetPreallocation_MPIAIJ", 1819 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1820 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1821 "MatDiagonalScaleLocal_MPIAIJ", 1822 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 EXTERN_C_END 1826 1827 /*MC 1828 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1829 1830 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1831 and MATMPIAIJ otherwise. 1832 1833 Options Database Keys: 1834 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1835 1836 Level: beginner 1837 1838 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1839 M*/ 1840 1841 EXTERN_C_BEGIN 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatCreate_AIJ" 1844 int MatCreate_AIJ(Mat A) { 1845 int ierr,size; 1846 1847 PetscFunctionBegin; 1848 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1849 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1850 if (size == 1) { 1851 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1852 } else { 1853 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1854 } 1855 PetscFunctionReturn(0); 1856 } 1857 EXTERN_C_END 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1861 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1862 { 1863 Mat mat; 1864 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1865 int ierr; 1866 1867 PetscFunctionBegin; 1868 *newmat = 0; 1869 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1870 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1871 a = (Mat_MPIAIJ*)mat->data; 1872 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1873 mat->factor = matin->factor; 1874 mat->assembled = PETSC_TRUE; 1875 mat->insertmode = NOT_SET_VALUES; 1876 mat->preallocated = PETSC_TRUE; 1877 1878 a->rstart = oldmat->rstart; 1879 a->rend = oldmat->rend; 1880 a->cstart = oldmat->cstart; 1881 a->cend = oldmat->cend; 1882 a->size = oldmat->size; 1883 a->rank = oldmat->rank; 1884 a->donotstash = oldmat->donotstash; 1885 a->roworiented = oldmat->roworiented; 1886 a->rowindices = 0; 1887 a->rowvalues = 0; 1888 a->getrowactive = PETSC_FALSE; 1889 1890 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1891 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1892 if (oldmat->colmap) { 1893 #if defined (PETSC_USE_CTABLE) 1894 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1895 #else 1896 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1897 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1898 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1899 #endif 1900 } else a->colmap = 0; 1901 if (oldmat->garray) { 1902 int len; 1903 len = oldmat->B->n; 1904 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1905 PetscLogObjectMemory(mat,len*sizeof(int)); 1906 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1907 } else a->garray = 0; 1908 1909 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1910 PetscLogObjectParent(mat,a->lvec); 1911 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1912 PetscLogObjectParent(mat,a->Mvctx); 1913 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1914 PetscLogObjectParent(mat,a->A); 1915 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1916 PetscLogObjectParent(mat,a->B); 1917 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1918 *newmat = mat; 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #include "petscsys.h" 1923 1924 #undef __FUNCT__ 1925 #define __FUNCT__ "MatLoad_MPIAIJ" 1926 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1927 { 1928 Mat A; 1929 PetscScalar *vals,*svals; 1930 MPI_Comm comm = ((PetscObject)viewer)->comm; 1931 MPI_Status status; 1932 int i,nz,ierr,j,rstart,rend,fd; 1933 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1934 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1935 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1936 1937 PetscFunctionBegin; 1938 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1939 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1940 if (!rank) { 1941 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1942 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1943 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1944 if (header[3] < 0) { 1945 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1946 } 1947 } 1948 1949 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1950 M = header[1]; N = header[2]; 1951 /* determine ownership of all rows */ 1952 m = M/size + ((M % size) > rank); 1953 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1954 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1955 rowners[0] = 0; 1956 for (i=2; i<=size; i++) { 1957 rowners[i] += rowners[i-1]; 1958 } 1959 rstart = rowners[rank]; 1960 rend = rowners[rank+1]; 1961 1962 /* distribute row lengths to all processors */ 1963 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1964 offlens = ourlens + (rend-rstart); 1965 if (!rank) { 1966 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1967 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1968 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1969 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1970 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1971 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1972 } else { 1973 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1974 } 1975 1976 if (!rank) { 1977 /* calculate the number of nonzeros on each processor */ 1978 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1979 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1980 for (i=0; i<size; i++) { 1981 for (j=rowners[i]; j< rowners[i+1]; j++) { 1982 procsnz[i] += rowlengths[j]; 1983 } 1984 } 1985 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1986 1987 /* determine max buffer needed and allocate it */ 1988 maxnz = 0; 1989 for (i=0; i<size; i++) { 1990 maxnz = PetscMax(maxnz,procsnz[i]); 1991 } 1992 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1993 1994 /* read in my part of the matrix column indices */ 1995 nz = procsnz[0]; 1996 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1997 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1998 1999 /* read in every one elses and ship off */ 2000 for (i=1; i<size; i++) { 2001 nz = procsnz[i]; 2002 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2003 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2004 } 2005 ierr = PetscFree(cols);CHKERRQ(ierr); 2006 } else { 2007 /* determine buffer space needed for message */ 2008 nz = 0; 2009 for (i=0; i<m; i++) { 2010 nz += ourlens[i]; 2011 } 2012 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2013 2014 /* receive message of column indices*/ 2015 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2016 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2017 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2018 } 2019 2020 /* determine column ownership if matrix is not square */ 2021 if (N != M) { 2022 n = N/size + ((N % size) > rank); 2023 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2024 cstart = cend - n; 2025 } else { 2026 cstart = rstart; 2027 cend = rend; 2028 n = cend - cstart; 2029 } 2030 2031 /* loop over local rows, determining number of off diagonal entries */ 2032 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2033 jj = 0; 2034 for (i=0; i<m; i++) { 2035 for (j=0; j<ourlens[i]; j++) { 2036 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2037 jj++; 2038 } 2039 } 2040 2041 /* create our matrix */ 2042 for (i=0; i<m; i++) { 2043 ourlens[i] -= offlens[i]; 2044 } 2045 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2046 ierr = MatSetType(A,type);CHKERRQ(ierr); 2047 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2048 2049 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2050 for (i=0; i<m; i++) { 2051 ourlens[i] += offlens[i]; 2052 } 2053 2054 if (!rank) { 2055 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2056 2057 /* read in my part of the matrix numerical values */ 2058 nz = procsnz[0]; 2059 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2060 2061 /* insert into matrix */ 2062 jj = rstart; 2063 smycols = mycols; 2064 svals = vals; 2065 for (i=0; i<m; i++) { 2066 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2067 smycols += ourlens[i]; 2068 svals += ourlens[i]; 2069 jj++; 2070 } 2071 2072 /* read in other processors and ship out */ 2073 for (i=1; i<size; i++) { 2074 nz = procsnz[i]; 2075 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2076 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2077 } 2078 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2079 } else { 2080 /* receive numeric values */ 2081 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2082 2083 /* receive message of values*/ 2084 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2085 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2086 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2087 2088 /* insert into matrix */ 2089 jj = rstart; 2090 smycols = mycols; 2091 svals = vals; 2092 for (i=0; i<m; i++) { 2093 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2094 smycols += ourlens[i]; 2095 svals += ourlens[i]; 2096 jj++; 2097 } 2098 } 2099 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2100 ierr = PetscFree(vals);CHKERRQ(ierr); 2101 ierr = PetscFree(mycols);CHKERRQ(ierr); 2102 ierr = PetscFree(rowners);CHKERRQ(ierr); 2103 2104 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2105 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2106 *newmat = A; 2107 PetscFunctionReturn(0); 2108 } 2109 2110 #undef __FUNCT__ 2111 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2112 /* 2113 Not great since it makes two copies of the submatrix, first an SeqAIJ 2114 in local and then by concatenating the local matrices the end result. 2115 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2116 */ 2117 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2118 { 2119 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2120 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2121 Mat *local,M,Mreuse; 2122 PetscScalar *vwork,*aa; 2123 MPI_Comm comm = mat->comm; 2124 Mat_SeqAIJ *aij; 2125 2126 2127 PetscFunctionBegin; 2128 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2129 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2130 2131 if (call == MAT_REUSE_MATRIX) { 2132 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2133 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2134 local = &Mreuse; 2135 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2136 } else { 2137 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2138 Mreuse = *local; 2139 ierr = PetscFree(local);CHKERRQ(ierr); 2140 } 2141 2142 /* 2143 m - number of local rows 2144 n - number of columns (same on all processors) 2145 rstart - first row in new global matrix generated 2146 */ 2147 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2148 if (call == MAT_INITIAL_MATRIX) { 2149 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2150 ii = aij->i; 2151 jj = aij->j; 2152 2153 /* 2154 Determine the number of non-zeros in the diagonal and off-diagonal 2155 portions of the matrix in order to do correct preallocation 2156 */ 2157 2158 /* first get start and end of "diagonal" columns */ 2159 if (csize == PETSC_DECIDE) { 2160 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2161 if (mglobal == n) { /* square matrix */ 2162 nlocal = m; 2163 } else { 2164 nlocal = n/size + ((n % size) > rank); 2165 } 2166 } else { 2167 nlocal = csize; 2168 } 2169 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2170 rstart = rend - nlocal; 2171 if (rank == size - 1 && rend != n) { 2172 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2173 } 2174 2175 /* next, compute all the lengths */ 2176 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2177 olens = dlens + m; 2178 for (i=0; i<m; i++) { 2179 jend = ii[i+1] - ii[i]; 2180 olen = 0; 2181 dlen = 0; 2182 for (j=0; j<jend; j++) { 2183 if (*jj < rstart || *jj >= rend) olen++; 2184 else dlen++; 2185 jj++; 2186 } 2187 olens[i] = olen; 2188 dlens[i] = dlen; 2189 } 2190 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2191 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2192 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2193 ierr = PetscFree(dlens);CHKERRQ(ierr); 2194 } else { 2195 int ml,nl; 2196 2197 M = *newmat; 2198 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2199 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2200 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2201 /* 2202 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2203 rather than the slower MatSetValues(). 2204 */ 2205 M->was_assembled = PETSC_TRUE; 2206 M->assembled = PETSC_FALSE; 2207 } 2208 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2209 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2210 ii = aij->i; 2211 jj = aij->j; 2212 aa = aij->a; 2213 for (i=0; i<m; i++) { 2214 row = rstart + i; 2215 nz = ii[i+1] - ii[i]; 2216 cwork = jj; jj += nz; 2217 vwork = aa; aa += nz; 2218 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2219 } 2220 2221 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2222 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2223 *newmat = M; 2224 2225 /* save submatrix used in processor for next request */ 2226 if (call == MAT_INITIAL_MATRIX) { 2227 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2228 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2229 } 2230 2231 PetscFunctionReturn(0); 2232 } 2233 2234 #undef __FUNCT__ 2235 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2236 /*@C 2237 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2238 (the default parallel PETSc format). For good matrix assembly performance 2239 the user should preallocate the matrix storage by setting the parameters 2240 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2241 performance can be increased by more than a factor of 50. 2242 2243 Collective on MPI_Comm 2244 2245 Input Parameters: 2246 + A - the matrix 2247 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2248 (same value is used for all local rows) 2249 . d_nnz - array containing the number of nonzeros in the various rows of the 2250 DIAGONAL portion of the local submatrix (possibly different for each row) 2251 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2252 The size of this array is equal to the number of local rows, i.e 'm'. 2253 You must leave room for the diagonal entry even if it is zero. 2254 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2255 submatrix (same value is used for all local rows). 2256 - o_nnz - array containing the number of nonzeros in the various rows of the 2257 OFF-DIAGONAL portion of the local submatrix (possibly different for 2258 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2259 structure. The size of this array is equal to the number 2260 of local rows, i.e 'm'. 2261 2262 The AIJ format (also called the Yale sparse matrix format or 2263 compressed row storage), is fully compatible with standard Fortran 77 2264 storage. That is, the stored row and column indices can begin at 2265 either one (as in Fortran) or zero. See the users manual for details. 2266 2267 The user MUST specify either the local or global matrix dimensions 2268 (possibly both). 2269 2270 The parallel matrix is partitioned such that the first m0 rows belong to 2271 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2272 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2273 2274 The DIAGONAL portion of the local submatrix of a processor can be defined 2275 as the submatrix which is obtained by extraction the part corresponding 2276 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2277 first row that belongs to the processor, and r2 is the last row belonging 2278 to the this processor. This is a square mxm matrix. The remaining portion 2279 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2280 2281 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2282 2283 By default, this format uses inodes (identical nodes) when possible. 2284 We search for consecutive rows with the same nonzero structure, thereby 2285 reusing matrix information to achieve increased efficiency. 2286 2287 Options Database Keys: 2288 + -mat_aij_no_inode - Do not use inodes 2289 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2290 - -mat_aij_oneindex - Internally use indexing starting at 1 2291 rather than 0. Note that when calling MatSetValues(), 2292 the user still MUST index entries starting at 0! 2293 2294 Example usage: 2295 2296 Consider the following 8x8 matrix with 34 non-zero values, that is 2297 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2298 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2299 as follows: 2300 2301 .vb 2302 1 2 0 | 0 3 0 | 0 4 2303 Proc0 0 5 6 | 7 0 0 | 8 0 2304 9 0 10 | 11 0 0 | 12 0 2305 ------------------------------------- 2306 13 0 14 | 15 16 17 | 0 0 2307 Proc1 0 18 0 | 19 20 21 | 0 0 2308 0 0 0 | 22 23 0 | 24 0 2309 ------------------------------------- 2310 Proc2 25 26 27 | 0 0 28 | 29 0 2311 30 0 0 | 31 32 33 | 0 34 2312 .ve 2313 2314 This can be represented as a collection of submatrices as: 2315 2316 .vb 2317 A B C 2318 D E F 2319 G H I 2320 .ve 2321 2322 Where the submatrices A,B,C are owned by proc0, D,E,F are 2323 owned by proc1, G,H,I are owned by proc2. 2324 2325 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2326 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2327 The 'M','N' parameters are 8,8, and have the same values on all procs. 2328 2329 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2330 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2331 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2332 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2333 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2334 matrix, ans [DF] as another SeqAIJ matrix. 2335 2336 When d_nz, o_nz parameters are specified, d_nz storage elements are 2337 allocated for every row of the local diagonal submatrix, and o_nz 2338 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2339 One way to choose d_nz and o_nz is to use the max nonzerors per local 2340 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2341 In this case, the values of d_nz,o_nz are: 2342 .vb 2343 proc0 : dnz = 2, o_nz = 2 2344 proc1 : dnz = 3, o_nz = 2 2345 proc2 : dnz = 1, o_nz = 4 2346 .ve 2347 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2348 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2349 for proc3. i.e we are using 12+15+10=37 storage locations to store 2350 34 values. 2351 2352 When d_nnz, o_nnz parameters are specified, the storage is specified 2353 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2354 In the above case the values for d_nnz,o_nnz are: 2355 .vb 2356 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2357 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2358 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2359 .ve 2360 Here the space allocated is sum of all the above values i.e 34, and 2361 hence pre-allocation is perfect. 2362 2363 Level: intermediate 2364 2365 .keywords: matrix, aij, compressed row, sparse, parallel 2366 2367 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2368 @*/ 2369 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2370 { 2371 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2372 2373 PetscFunctionBegin; 2374 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2375 if (f) { 2376 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2377 } 2378 PetscFunctionReturn(0); 2379 } 2380 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatCreateMPIAIJ" 2383 /*@C 2384 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2385 (the default parallel PETSc format). For good matrix assembly performance 2386 the user should preallocate the matrix storage by setting the parameters 2387 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2388 performance can be increased by more than a factor of 50. 2389 2390 Collective on MPI_Comm 2391 2392 Input Parameters: 2393 + comm - MPI communicator 2394 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2395 This value should be the same as the local size used in creating the 2396 y vector for the matrix-vector product y = Ax. 2397 . n - This value should be the same as the local size used in creating the 2398 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2399 calculated if N is given) For square matrices n is almost always m. 2400 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2401 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2402 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2403 (same value is used for all local rows) 2404 . d_nnz - array containing the number of nonzeros in the various rows of the 2405 DIAGONAL portion of the local submatrix (possibly different for each row) 2406 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2407 The size of this array is equal to the number of local rows, i.e 'm'. 2408 You must leave room for the diagonal entry even if it is zero. 2409 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2410 submatrix (same value is used for all local rows). 2411 - o_nnz - array containing the number of nonzeros in the various rows of the 2412 OFF-DIAGONAL portion of the local submatrix (possibly different for 2413 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2414 structure. The size of this array is equal to the number 2415 of local rows, i.e 'm'. 2416 2417 Output Parameter: 2418 . A - the matrix 2419 2420 Notes: 2421 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2422 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2423 storage requirements for this matrix. 2424 2425 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2426 processor than it must be used on all processors that share the object for 2427 that argument. 2428 2429 The AIJ format (also called the Yale sparse matrix format or 2430 compressed row storage), is fully compatible with standard Fortran 77 2431 storage. That is, the stored row and column indices can begin at 2432 either one (as in Fortran) or zero. See the users manual for details. 2433 2434 The user MUST specify either the local or global matrix dimensions 2435 (possibly both). 2436 2437 The parallel matrix is partitioned such that the first m0 rows belong to 2438 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2439 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2440 2441 The DIAGONAL portion of the local submatrix of a processor can be defined 2442 as the submatrix which is obtained by extraction the part corresponding 2443 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2444 first row that belongs to the processor, and r2 is the last row belonging 2445 to the this processor. This is a square mxm matrix. The remaining portion 2446 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2447 2448 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2449 2450 When calling this routine with a single process communicator, a matrix of 2451 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2452 type of communicator, use the construction mechanism: 2453 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2454 2455 By default, this format uses inodes (identical nodes) when possible. 2456 We search for consecutive rows with the same nonzero structure, thereby 2457 reusing matrix information to achieve increased efficiency. 2458 2459 Options Database Keys: 2460 + -mat_aij_no_inode - Do not use inodes 2461 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2462 - -mat_aij_oneindex - Internally use indexing starting at 1 2463 rather than 0. Note that when calling MatSetValues(), 2464 the user still MUST index entries starting at 0! 2465 2466 2467 Example usage: 2468 2469 Consider the following 8x8 matrix with 34 non-zero values, that is 2470 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2471 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2472 as follows: 2473 2474 .vb 2475 1 2 0 | 0 3 0 | 0 4 2476 Proc0 0 5 6 | 7 0 0 | 8 0 2477 9 0 10 | 11 0 0 | 12 0 2478 ------------------------------------- 2479 13 0 14 | 15 16 17 | 0 0 2480 Proc1 0 18 0 | 19 20 21 | 0 0 2481 0 0 0 | 22 23 0 | 24 0 2482 ------------------------------------- 2483 Proc2 25 26 27 | 0 0 28 | 29 0 2484 30 0 0 | 31 32 33 | 0 34 2485 .ve 2486 2487 This can be represented as a collection of submatrices as: 2488 2489 .vb 2490 A B C 2491 D E F 2492 G H I 2493 .ve 2494 2495 Where the submatrices A,B,C are owned by proc0, D,E,F are 2496 owned by proc1, G,H,I are owned by proc2. 2497 2498 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2499 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2500 The 'M','N' parameters are 8,8, and have the same values on all procs. 2501 2502 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2503 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2504 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2505 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2506 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2507 matrix, ans [DF] as another SeqAIJ matrix. 2508 2509 When d_nz, o_nz parameters are specified, d_nz storage elements are 2510 allocated for every row of the local diagonal submatrix, and o_nz 2511 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2512 One way to choose d_nz and o_nz is to use the max nonzerors per local 2513 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2514 In this case, the values of d_nz,o_nz are: 2515 .vb 2516 proc0 : dnz = 2, o_nz = 2 2517 proc1 : dnz = 3, o_nz = 2 2518 proc2 : dnz = 1, o_nz = 4 2519 .ve 2520 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2521 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2522 for proc3. i.e we are using 12+15+10=37 storage locations to store 2523 34 values. 2524 2525 When d_nnz, o_nnz parameters are specified, the storage is specified 2526 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2527 In the above case the values for d_nnz,o_nnz are: 2528 .vb 2529 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2530 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2531 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2532 .ve 2533 Here the space allocated is sum of all the above values i.e 34, and 2534 hence pre-allocation is perfect. 2535 2536 Level: intermediate 2537 2538 .keywords: matrix, aij, compressed row, sparse, parallel 2539 2540 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2541 @*/ 2542 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2543 { 2544 int ierr,size; 2545 2546 PetscFunctionBegin; 2547 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2548 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2549 if (size > 1) { 2550 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2551 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2552 } else { 2553 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2554 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2555 } 2556 PetscFunctionReturn(0); 2557 } 2558 2559 #undef __FUNCT__ 2560 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2561 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2562 { 2563 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2564 PetscFunctionBegin; 2565 *Ad = a->A; 2566 *Ao = a->B; 2567 *colmap = a->garray; 2568 PetscFunctionReturn(0); 2569 } 2570 2571 #undef __FUNCT__ 2572 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2573 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2574 { 2575 int ierr,i; 2576 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2577 2578 PetscFunctionBegin; 2579 if (coloring->ctype == IS_COLORING_LOCAL) { 2580 ISColoringValue *allcolors,*colors; 2581 ISColoring ocoloring; 2582 2583 /* set coloring for diagonal portion */ 2584 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2585 2586 /* set coloring for off-diagonal portion */ 2587 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2588 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2589 for (i=0; i<a->B->n; i++) { 2590 colors[i] = allcolors[a->garray[i]]; 2591 } 2592 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2593 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2594 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2595 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2596 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2597 ISColoringValue *colors; 2598 int *larray; 2599 ISColoring ocoloring; 2600 2601 /* set coloring for diagonal portion */ 2602 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2603 for (i=0; i<a->A->n; i++) { 2604 larray[i] = i + a->cstart; 2605 } 2606 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2607 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2608 for (i=0; i<a->A->n; i++) { 2609 colors[i] = coloring->colors[larray[i]]; 2610 } 2611 ierr = PetscFree(larray);CHKERRQ(ierr); 2612 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2613 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2614 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2615 2616 /* set coloring for off-diagonal portion */ 2617 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2618 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2619 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2620 for (i=0; i<a->B->n; i++) { 2621 colors[i] = coloring->colors[larray[i]]; 2622 } 2623 ierr = PetscFree(larray);CHKERRQ(ierr); 2624 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2625 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2626 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2627 } else { 2628 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2629 } 2630 2631 PetscFunctionReturn(0); 2632 } 2633 2634 #undef __FUNCT__ 2635 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2636 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2637 { 2638 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2639 int ierr; 2640 2641 PetscFunctionBegin; 2642 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2643 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2644 PetscFunctionReturn(0); 2645 } 2646 2647 #undef __FUNCT__ 2648 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2649 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2650 { 2651 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2652 int ierr; 2653 2654 PetscFunctionBegin; 2655 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2656 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2657 PetscFunctionReturn(0); 2658 } 2659 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "MatMerge" 2662 /*@C 2663 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2664 matrices from each processor 2665 2666 Collective on MPI_Comm 2667 2668 Input Parameters: 2669 + comm - the communicators the parallel matrix will live on 2670 - inmat - the input sequential matrices 2671 2672 Output Parameter: 2673 . outmat - the parallel matrix generated 2674 2675 Level: advanced 2676 2677 Notes: The number of columns of the matrix in EACH of the seperate files 2678 MUST be the same. 2679 2680 @*/ 2681 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2682 { 2683 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2684 PetscScalar *values; 2685 PetscMap columnmap,rowmap; 2686 2687 PetscFunctionBegin; 2688 2689 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2690 2691 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2692 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2693 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2694 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2695 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2696 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2697 2698 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2699 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2700 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2701 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2702 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2703 2704 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2705 for (i=0;i<m;i++) { 2706 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2707 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2708 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2709 } 2710 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2711 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2712 2713 for (i=0;i<m;i++) { 2714 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2715 I = i + rstart; 2716 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2717 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2718 } 2719 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2720 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2721 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2722 2723 PetscFunctionReturn(0); 2724 } 2725 2726 #undef __FUNCT__ 2727 #define __FUNCT__ "MatFileSplit" 2728 int MatFileSplit(Mat A,char *outfile) 2729 { 2730 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2731 PetscViewer out; 2732 char *name; 2733 Mat B; 2734 PetscScalar *values; 2735 2736 PetscFunctionBegin; 2737 2738 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2739 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2740 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2741 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2742 for (i=0;i<m;i++) { 2743 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2744 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2745 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2746 } 2747 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2748 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2749 2750 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2751 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2752 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2753 sprintf(name,"%s.%d",outfile,rank); 2754 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2755 ierr = PetscFree(name); 2756 ierr = MatView(B,out);CHKERRQ(ierr); 2757 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2758 ierr = MatDestroy(B);CHKERRQ(ierr); 2759 PetscFunctionReturn(0); 2760 } 2761