1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 /* 8 Local utility routine that creates a mapping from the global column 9 number to the local number in the off-diagonal part of the local 10 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 11 a slightly higher hash table cost; without it it is not scalable (each processor 12 has an order N integer array but is fast to acess. 13 */ 14 #undef __FUNCT__ 15 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 19 int n = aij->B->n,i,ierr; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 29 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 #define CHUNKSIZE 15 37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 38 { \ 39 \ 40 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 41 rmax = aimax[row]; nrow = ailen[row]; \ 42 col1 = col - shift; \ 43 \ 44 low = 0; high = nrow; \ 45 while (high-low > 5) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > col) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > col1) break; \ 52 if (rp[_i] == col1) { \ 53 if (addv == ADD_VALUES) ap[_i] += value; \ 54 else ap[_i] = value; \ 55 goto a_noinsert; \ 56 } \ 57 } \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (int*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(int));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (nonew == 1) goto b_noinsert; \ 133 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 134 if (nrow >= rmax) { \ 135 /* there is no extra room in row, therefore enlarge */ \ 136 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 137 PetscScalar *new_a; \ 138 \ 139 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 140 \ 141 /* malloc new storage space */ \ 142 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 143 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 144 new_j = (int*)(new_a + new_nz); \ 145 new_i = new_j + new_nz; \ 146 \ 147 /* copy over old data into new slots */ \ 148 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 149 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 150 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 151 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 152 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 153 len*sizeof(int));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 156 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 157 /* free up old matrix storage */ \ 158 \ 159 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 160 if (!b->singlemalloc) { \ 161 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 162 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 163 } \ 164 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 165 b->singlemalloc = PETSC_TRUE; \ 166 \ 167 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 168 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 169 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 170 b->maxnz += CHUNKSIZE; \ 171 b->reallocs++; \ 172 } \ 173 N = nrow++ - 1; b->nz++; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ap[ii+1] = ap[ii]; \ 178 } \ 179 rp[_i] = col1; \ 180 ap[_i] = value; \ 181 b_noinsert: ; \ 182 bilen[row] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIAIJ" 187 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 190 PetscScalar value; 191 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 192 int cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 int nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 277 int cstart = aij->cstart,cend = aij->cend,row,col; 278 279 PetscFunctionBegin; 280 for (i=0; i<m; i++) { 281 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 282 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 283 if (idxm[i] >= rstart && idxm[i] < rend) { 284 row = idxm[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 287 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 288 if (idxn[j] >= cstart && idxn[j] < cend){ 289 col = idxn[j] - cstart; 290 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 291 } else { 292 if (!aij->colmap) { 293 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 294 } 295 #if defined (PETSC_USE_CTABLE) 296 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 297 col --; 298 #else 299 col = aij->colmap[idxn[j]] - 1; 300 #endif 301 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 302 else { 303 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 304 } 305 } 306 } 307 } else { 308 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 309 } 310 } 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 316 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 317 { 318 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 319 int ierr,nstash,reallocs; 320 InsertMode addv; 321 322 PetscFunctionBegin; 323 if (aij->donotstash) { 324 PetscFunctionReturn(0); 325 } 326 327 /* make sure all processors are either in INSERTMODE or ADDMODE */ 328 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 329 if (addv == (ADD_VALUES|INSERT_VALUES)) { 330 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 331 } 332 mat->insertmode = addv; /* in case this processor had no cache */ 333 334 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 335 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 336 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 337 PetscFunctionReturn(0); 338 } 339 340 341 #undef __FUNCT__ 342 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 343 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 344 { 345 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 346 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 347 int i,j,rstart,ncols,n,ierr,flg; 348 int *row,*col,other_disassembled; 349 PetscScalar *val; 350 InsertMode addv = mat->insertmode; 351 352 PetscFunctionBegin; 353 if (!aij->donotstash) { 354 while (1) { 355 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 356 if (!flg) break; 357 358 for (i=0; i<n;) { 359 /* Now identify the consecutive vals belonging to the same row */ 360 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 361 if (j < n) ncols = j-i; 362 else ncols = n-i; 363 /* Now assemble all these values with a single function call */ 364 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 365 i = j; 366 } 367 } 368 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 369 } 370 371 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 372 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 373 374 /* determine if any processor has disassembled, if so we must 375 also disassemble ourselfs, in order that we may reassemble. */ 376 /* 377 if nonzero structure of submatrix B cannot change then we know that 378 no processor disassembled thus we can skip this stuff 379 */ 380 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 381 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 382 if (mat->was_assembled && !other_disassembled) { 383 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 384 } 385 } 386 387 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 388 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 389 } 390 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 391 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 392 393 if (aij->rowvalues) { 394 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 395 aij->rowvalues = 0; 396 } 397 398 /* used by MatAXPY() */ 399 a->xtoy = 0; b->xtoy = 0; 400 a->XtoY = 0; b->XtoY = 0; 401 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 407 int MatZeroEntries_MPIAIJ(Mat A) 408 { 409 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 410 int ierr; 411 412 PetscFunctionBegin; 413 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 414 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatZeroRows_MPIAIJ" 420 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 421 { 422 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 423 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 424 int *nprocs,j,idx,nsends,row; 425 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 426 int *rvalues,tag = A->tag,count,base,slen,n,*source; 427 int *lens,imdex,*lrows,*values,rstart=l->rstart; 428 MPI_Comm comm = A->comm; 429 MPI_Request *send_waits,*recv_waits; 430 MPI_Status recv_status,*send_status; 431 IS istmp; 432 PetscTruth found; 433 434 PetscFunctionBegin; 435 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 436 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 437 438 /* first count number of contributors to each processor */ 439 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 440 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 441 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 442 for (i=0; i<N; i++) { 443 idx = rows[i]; 444 found = PETSC_FALSE; 445 for (j=0; j<size; j++) { 446 if (idx >= owners[j] && idx < owners[j+1]) { 447 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 448 } 449 } 450 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 451 } 452 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 453 454 /* inform other processors of number of messages and max length*/ 455 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 456 457 /* post receives: */ 458 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 459 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 460 for (i=0; i<nrecvs; i++) { 461 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 462 } 463 464 /* do sends: 465 1) starts[i] gives the starting index in svalues for stuff going to 466 the ith processor 467 */ 468 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 469 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 470 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 471 starts[0] = 0; 472 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 473 for (i=0; i<N; i++) { 474 svalues[starts[owner[i]]++] = rows[i]; 475 } 476 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 477 478 starts[0] = 0; 479 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 480 count = 0; 481 for (i=0; i<size; i++) { 482 if (nprocs[2*i+1]) { 483 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 484 } 485 } 486 ierr = PetscFree(starts);CHKERRQ(ierr); 487 488 base = owners[rank]; 489 490 /* wait on receives */ 491 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 492 source = lens + nrecvs; 493 count = nrecvs; slen = 0; 494 while (count) { 495 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 496 /* unpack receives into our local space */ 497 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 498 source[imdex] = recv_status.MPI_SOURCE; 499 lens[imdex] = n; 500 slen += n; 501 count--; 502 } 503 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 504 505 /* move the data into the send scatter */ 506 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 507 count = 0; 508 for (i=0; i<nrecvs; i++) { 509 values = rvalues + i*nmax; 510 for (j=0; j<lens[i]; j++) { 511 lrows[count++] = values[j] - base; 512 } 513 } 514 ierr = PetscFree(rvalues);CHKERRQ(ierr); 515 ierr = PetscFree(lens);CHKERRQ(ierr); 516 ierr = PetscFree(owner);CHKERRQ(ierr); 517 ierr = PetscFree(nprocs);CHKERRQ(ierr); 518 519 /* actually zap the local rows */ 520 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 521 PetscLogObjectParent(A,istmp); 522 523 /* 524 Zero the required rows. If the "diagonal block" of the matrix 525 is square and the user wishes to set the diagonal we use seperate 526 code so that MatSetValues() is not called for each diagonal allocating 527 new memory, thus calling lots of mallocs and slowing things down. 528 529 Contributed by: Mathew Knepley 530 */ 531 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 532 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 533 if (diag && (l->A->M == l->A->N)) { 534 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 535 } else if (diag) { 536 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 537 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 538 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 539 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 540 } 541 for (i = 0; i < slen; i++) { 542 row = lrows[i] + rstart; 543 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 544 } 545 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 547 } else { 548 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 549 } 550 ierr = ISDestroy(istmp);CHKERRQ(ierr); 551 ierr = PetscFree(lrows);CHKERRQ(ierr); 552 553 /* wait on sends */ 554 if (nsends) { 555 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 556 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 557 ierr = PetscFree(send_status);CHKERRQ(ierr); 558 } 559 ierr = PetscFree(send_waits);CHKERRQ(ierr); 560 ierr = PetscFree(svalues);CHKERRQ(ierr); 561 562 PetscFunctionReturn(0); 563 } 564 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatMult_MPIAIJ" 567 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 568 { 569 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 570 int ierr,nt; 571 572 PetscFunctionBegin; 573 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 574 if (nt != A->n) { 575 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 576 } 577 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 578 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 579 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 580 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatMultAdd_MPIAIJ" 586 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 587 { 588 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 589 int ierr; 590 591 PetscFunctionBegin; 592 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 594 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 595 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 601 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 602 { 603 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 604 int ierr; 605 606 PetscFunctionBegin; 607 /* do nondiagonal part */ 608 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 609 /* send it on its way */ 610 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 611 /* do local part */ 612 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 613 /* receive remote parts: note this assumes the values are not actually */ 614 /* inserted in yy until the next line, which is true for my implementation*/ 615 /* but is not perhaps always true. */ 616 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 PetscFunctionReturn(0); 618 } 619 620 EXTERN_C_BEGIN 621 #undef __FUNCT__ 622 #define __FUNCT__ "MatIsSymmetric_MPIAIJ" 623 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f) 624 { 625 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 626 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 627 IS Me,Notme; 628 int M,N,first,last,*notme,i, ierr; 629 630 PetscFunctionBegin; 631 632 /* Easy test: symmetric diagonal block */ 633 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 634 ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr); 635 if (!*f) PetscFunctionReturn(0); 636 637 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 638 ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr); 639 ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr); 640 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me); CHKERRQ(ierr); 641 for (i=0; i<first; i++) notme[i] = i; 642 for (i=last; i<M; i++) notme[i-last+first] = i; 643 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr); 644 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr); 645 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr); 646 Aoff = Aoffs[0]; 647 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr); 648 Boff = Boffs[0]; 649 ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr); 650 ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr); 651 ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr); 652 ierr = ISDestroy(Me); CHKERRQ(ierr); 653 ierr = ISDestroy(Notme); CHKERRQ(ierr); 654 655 PetscFunctionReturn(0); 656 } 657 EXTERN_C_END 658 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 661 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 662 { 663 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 664 int ierr; 665 666 PetscFunctionBegin; 667 /* do nondiagonal part */ 668 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 669 /* send it on its way */ 670 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 671 /* do local part */ 672 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 673 /* receive remote parts: note this assumes the values are not actually */ 674 /* inserted in yy until the next line, which is true for my implementation*/ 675 /* but is not perhaps always true. */ 676 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 /* 681 This only works correctly for square matrices where the subblock A->A is the 682 diagonal block 683 */ 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 686 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 687 { 688 int ierr; 689 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 690 691 PetscFunctionBegin; 692 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 693 if (a->rstart != a->cstart || a->rend != a->cend) { 694 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 695 } 696 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 697 PetscFunctionReturn(0); 698 } 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatScale_MPIAIJ" 702 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 703 { 704 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 705 int ierr; 706 707 PetscFunctionBegin; 708 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 709 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "MatDestroy_MPIAIJ" 715 int MatDestroy_MPIAIJ(Mat mat) 716 { 717 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 718 int ierr; 719 720 PetscFunctionBegin; 721 #if defined(PETSC_USE_LOG) 722 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 723 #endif 724 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 725 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 726 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 727 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 728 #if defined (PETSC_USE_CTABLE) 729 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 730 #else 731 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 732 #endif 733 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 734 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 735 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 736 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 737 ierr = PetscFree(aij);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatView_MPIAIJ_Binary" 743 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 744 { 745 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 746 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 747 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 748 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 749 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 750 PetscScalar *column_values; 751 752 PetscFunctionBegin; 753 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 754 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 755 nz = A->nz + B->nz; 756 if (rank == 0) { 757 header[0] = MAT_FILE_COOKIE; 758 header[1] = mat->M; 759 header[2] = mat->N; 760 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 761 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 762 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 763 /* get largest number of rows any processor has */ 764 rlen = mat->m; 765 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 766 for (i=1; i<size; i++) { 767 rlen = PetscMax(rlen,range[i+1] - range[i]); 768 } 769 } else { 770 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 771 rlen = mat->m; 772 } 773 774 /* load up the local row counts */ 775 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 776 for (i=0; i<mat->m; i++) { 777 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 778 } 779 780 /* store the row lengths to the file */ 781 if (rank == 0) { 782 MPI_Status status; 783 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 784 for (i=1; i<size; i++) { 785 rlen = range[i+1] - range[i]; 786 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 787 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 788 } 789 } else { 790 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 791 } 792 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 793 794 /* load up the local column indices */ 795 nzmax = nz; /* )th processor needs space a largest processor needs */ 796 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 797 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 798 cnt = 0; 799 for (i=0; i<mat->m; i++) { 800 for (j=B->i[i]; j<B->i[i+1]; j++) { 801 if ( (col = garray[B->j[j]]) > cstart) break; 802 column_indices[cnt++] = col; 803 } 804 for (k=A->i[i]; k<A->i[i+1]; k++) { 805 column_indices[cnt++] = A->j[k] + cstart; 806 } 807 for (; j<B->i[i+1]; j++) { 808 column_indices[cnt++] = garray[B->j[j]]; 809 } 810 } 811 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 812 813 /* store the column indices to the file */ 814 if (rank == 0) { 815 MPI_Status status; 816 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 817 for (i=1; i<size; i++) { 818 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 819 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 820 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 822 } 823 } else { 824 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 825 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 826 } 827 ierr = PetscFree(column_indices);CHKERRQ(ierr); 828 829 /* load up the local column values */ 830 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 831 cnt = 0; 832 for (i=0; i<mat->m; i++) { 833 for (j=B->i[i]; j<B->i[i+1]; j++) { 834 if ( garray[B->j[j]] > cstart) break; 835 column_values[cnt++] = B->a[j]; 836 } 837 for (k=A->i[i]; k<A->i[i+1]; k++) { 838 column_values[cnt++] = A->a[k]; 839 } 840 for (; j<B->i[i+1]; j++) { 841 column_values[cnt++] = B->a[j]; 842 } 843 } 844 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 845 846 /* store the column values to the file */ 847 if (rank == 0) { 848 MPI_Status status; 849 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 850 for (i=1; i<size; i++) { 851 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 852 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 853 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 855 } 856 } else { 857 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 858 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(column_values);CHKERRQ(ierr); 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 866 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 867 { 868 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 869 int ierr,rank = aij->rank,size = aij->size; 870 PetscTruth isdraw,isascii,flg,isbinary; 871 PetscViewer sviewer; 872 PetscViewerFormat format; 873 874 PetscFunctionBegin; 875 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 876 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 878 if (isascii) { 879 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 880 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 881 MatInfo info; 882 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 883 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 884 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 885 if (flg) { 886 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 887 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 888 } else { 889 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 890 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 891 } 892 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 894 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 896 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 897 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } else if (format == PETSC_VIEWER_ASCII_INFO) { 900 PetscFunctionReturn(0); 901 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 902 PetscFunctionReturn(0); 903 } 904 } else if (isbinary) { 905 if (size == 1) { 906 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 907 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 908 } else { 909 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 910 } 911 PetscFunctionReturn(0); 912 } else if (isdraw) { 913 PetscDraw draw; 914 PetscTruth isnull; 915 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 916 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 917 } 918 919 if (size == 1) { 920 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 921 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 922 } else { 923 /* assemble the entire matrix onto first processor. */ 924 Mat A; 925 Mat_SeqAIJ *Aloc; 926 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 927 PetscScalar *a; 928 929 if (!rank) { 930 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 931 } else { 932 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 933 } 934 PetscLogObjectParent(mat,A); 935 936 /* copy over the A part */ 937 Aloc = (Mat_SeqAIJ*)aij->A->data; 938 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 939 row = aij->rstart; 940 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 941 for (i=0; i<m; i++) { 942 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 943 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 944 } 945 aj = Aloc->j; 946 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 947 948 /* copy over the B part */ 949 Aloc = (Mat_SeqAIJ*)aij->B->data; 950 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 951 row = aij->rstart; 952 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 953 ct = cols; 954 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 955 for (i=0; i<m; i++) { 956 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 957 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 958 } 959 ierr = PetscFree(ct);CHKERRQ(ierr); 960 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 961 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 962 /* 963 Everyone has to call to draw the matrix since the graphics waits are 964 synchronized across all processors that share the PetscDraw object 965 */ 966 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 967 if (!rank) { 968 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 969 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 970 } 971 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 972 ierr = MatDestroy(A);CHKERRQ(ierr); 973 } 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatView_MPIAIJ" 979 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 980 { 981 int ierr; 982 PetscTruth isascii,isdraw,issocket,isbinary; 983 984 PetscFunctionBegin; 985 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 986 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 987 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 988 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 989 if (isascii || isdraw || isbinary || issocket) { 990 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 991 } else { 992 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 998 999 #undef __FUNCT__ 1000 #define __FUNCT__ "MatRelax_MPIAIJ" 1001 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1002 { 1003 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1004 int ierr; 1005 Vec bb1; 1006 PetscScalar mone=-1.0; 1007 1008 PetscFunctionBegin; 1009 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1010 1011 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1012 1013 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1014 if (flag & SOR_ZERO_INITIAL_GUESS) { 1015 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1016 its--; 1017 } 1018 1019 while (its--) { 1020 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1021 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1022 1023 /* update rhs: bb1 = bb - B*x */ 1024 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1025 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1026 1027 /* local sweep */ 1028 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1029 CHKERRQ(ierr); 1030 } 1031 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1032 if (flag & SOR_ZERO_INITIAL_GUESS) { 1033 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1034 its--; 1035 } 1036 while (its--) { 1037 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1038 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1039 1040 /* update rhs: bb1 = bb - B*x */ 1041 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1042 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1043 1044 /* local sweep */ 1045 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1046 CHKERRQ(ierr); 1047 } 1048 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1049 if (flag & SOR_ZERO_INITIAL_GUESS) { 1050 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1051 its--; 1052 } 1053 while (its--) { 1054 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1055 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1056 1057 /* update rhs: bb1 = bb - B*x */ 1058 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1059 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1060 1061 /* local sweep */ 1062 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1063 CHKERRQ(ierr); 1064 } 1065 } else { 1066 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1067 } 1068 1069 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1075 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1076 { 1077 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1078 Mat A = mat->A,B = mat->B; 1079 int ierr; 1080 PetscReal isend[5],irecv[5]; 1081 1082 PetscFunctionBegin; 1083 info->block_size = 1.0; 1084 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1085 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1086 isend[3] = info->memory; isend[4] = info->mallocs; 1087 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1088 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1089 isend[3] += info->memory; isend[4] += info->mallocs; 1090 if (flag == MAT_LOCAL) { 1091 info->nz_used = isend[0]; 1092 info->nz_allocated = isend[1]; 1093 info->nz_unneeded = isend[2]; 1094 info->memory = isend[3]; 1095 info->mallocs = isend[4]; 1096 } else if (flag == MAT_GLOBAL_MAX) { 1097 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1098 info->nz_used = irecv[0]; 1099 info->nz_allocated = irecv[1]; 1100 info->nz_unneeded = irecv[2]; 1101 info->memory = irecv[3]; 1102 info->mallocs = irecv[4]; 1103 } else if (flag == MAT_GLOBAL_SUM) { 1104 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1105 info->nz_used = irecv[0]; 1106 info->nz_allocated = irecv[1]; 1107 info->nz_unneeded = irecv[2]; 1108 info->memory = irecv[3]; 1109 info->mallocs = irecv[4]; 1110 } 1111 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1112 info->fill_ratio_needed = 0; 1113 info->factor_mallocs = 0; 1114 info->rows_global = (double)matin->M; 1115 info->columns_global = (double)matin->N; 1116 info->rows_local = (double)matin->m; 1117 info->columns_local = (double)matin->N; 1118 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatSetOption_MPIAIJ" 1124 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1125 { 1126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1127 int ierr; 1128 1129 PetscFunctionBegin; 1130 switch (op) { 1131 case MAT_NO_NEW_NONZERO_LOCATIONS: 1132 case MAT_YES_NEW_NONZERO_LOCATIONS: 1133 case MAT_COLUMNS_UNSORTED: 1134 case MAT_COLUMNS_SORTED: 1135 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1136 case MAT_KEEP_ZEROED_ROWS: 1137 case MAT_NEW_NONZERO_LOCATION_ERR: 1138 case MAT_USE_INODES: 1139 case MAT_DO_NOT_USE_INODES: 1140 case MAT_IGNORE_ZERO_ENTRIES: 1141 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1142 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1143 break; 1144 case MAT_ROW_ORIENTED: 1145 a->roworiented = PETSC_TRUE; 1146 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1147 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1148 break; 1149 case MAT_ROWS_SORTED: 1150 case MAT_ROWS_UNSORTED: 1151 case MAT_YES_NEW_DIAGONALS: 1152 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1153 break; 1154 case MAT_COLUMN_ORIENTED: 1155 a->roworiented = PETSC_FALSE; 1156 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1157 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1158 break; 1159 case MAT_IGNORE_OFF_PROC_ENTRIES: 1160 a->donotstash = PETSC_TRUE; 1161 break; 1162 case MAT_NO_NEW_DIAGONALS: 1163 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1164 default: 1165 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1166 } 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatGetRow_MPIAIJ" 1172 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1173 { 1174 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1175 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1176 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1177 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1178 int *cmap,*idx_p; 1179 1180 PetscFunctionBegin; 1181 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1182 mat->getrowactive = PETSC_TRUE; 1183 1184 if (!mat->rowvalues && (idx || v)) { 1185 /* 1186 allocate enough space to hold information from the longest row. 1187 */ 1188 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1189 int max = 1,tmp; 1190 for (i=0; i<matin->m; i++) { 1191 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1192 if (max < tmp) { max = tmp; } 1193 } 1194 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1195 mat->rowindices = (int*)(mat->rowvalues + max); 1196 } 1197 1198 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1199 lrow = row - rstart; 1200 1201 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1202 if (!v) {pvA = 0; pvB = 0;} 1203 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1204 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1205 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1206 nztot = nzA + nzB; 1207 1208 cmap = mat->garray; 1209 if (v || idx) { 1210 if (nztot) { 1211 /* Sort by increasing column numbers, assuming A and B already sorted */ 1212 int imark = -1; 1213 if (v) { 1214 *v = v_p = mat->rowvalues; 1215 for (i=0; i<nzB; i++) { 1216 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1217 else break; 1218 } 1219 imark = i; 1220 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1221 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1222 } 1223 if (idx) { 1224 *idx = idx_p = mat->rowindices; 1225 if (imark > -1) { 1226 for (i=0; i<imark; i++) { 1227 idx_p[i] = cmap[cworkB[i]]; 1228 } 1229 } else { 1230 for (i=0; i<nzB; i++) { 1231 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1232 else break; 1233 } 1234 imark = i; 1235 } 1236 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1237 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1238 } 1239 } else { 1240 if (idx) *idx = 0; 1241 if (v) *v = 0; 1242 } 1243 } 1244 *nz = nztot; 1245 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1246 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNCT__ 1251 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1252 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1253 { 1254 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1255 1256 PetscFunctionBegin; 1257 if (aij->getrowactive == PETSC_FALSE) { 1258 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1259 } 1260 aij->getrowactive = PETSC_FALSE; 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "MatNorm_MPIAIJ" 1266 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1267 { 1268 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1269 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1270 int ierr,i,j,cstart = aij->cstart; 1271 PetscReal sum = 0.0; 1272 PetscScalar *v; 1273 1274 PetscFunctionBegin; 1275 if (aij->size == 1) { 1276 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1277 } else { 1278 if (type == NORM_FROBENIUS) { 1279 v = amat->a; 1280 for (i=0; i<amat->nz; i++) { 1281 #if defined(PETSC_USE_COMPLEX) 1282 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1283 #else 1284 sum += (*v)*(*v); v++; 1285 #endif 1286 } 1287 v = bmat->a; 1288 for (i=0; i<bmat->nz; i++) { 1289 #if defined(PETSC_USE_COMPLEX) 1290 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1291 #else 1292 sum += (*v)*(*v); v++; 1293 #endif 1294 } 1295 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1296 *norm = sqrt(*norm); 1297 } else if (type == NORM_1) { /* max column norm */ 1298 PetscReal *tmp,*tmp2; 1299 int *jj,*garray = aij->garray; 1300 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1301 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1302 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1303 *norm = 0.0; 1304 v = amat->a; jj = amat->j; 1305 for (j=0; j<amat->nz; j++) { 1306 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1307 } 1308 v = bmat->a; jj = bmat->j; 1309 for (j=0; j<bmat->nz; j++) { 1310 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1311 } 1312 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1313 for (j=0; j<mat->N; j++) { 1314 if (tmp2[j] > *norm) *norm = tmp2[j]; 1315 } 1316 ierr = PetscFree(tmp);CHKERRQ(ierr); 1317 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1318 } else if (type == NORM_INFINITY) { /* max row norm */ 1319 PetscReal ntemp = 0.0; 1320 for (j=0; j<aij->A->m; j++) { 1321 v = amat->a + amat->i[j]; 1322 sum = 0.0; 1323 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1324 sum += PetscAbsScalar(*v); v++; 1325 } 1326 v = bmat->a + bmat->i[j]; 1327 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1328 sum += PetscAbsScalar(*v); v++; 1329 } 1330 if (sum > ntemp) ntemp = sum; 1331 } 1332 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1333 } else { 1334 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1335 } 1336 } 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatTranspose_MPIAIJ" 1342 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1343 { 1344 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1345 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1346 int ierr; 1347 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1348 Mat B; 1349 PetscScalar *array; 1350 1351 PetscFunctionBegin; 1352 if (!matout && M != N) { 1353 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1354 } 1355 1356 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1357 1358 /* copy over the A part */ 1359 Aloc = (Mat_SeqAIJ*)a->A->data; 1360 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1361 row = a->rstart; 1362 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1363 for (i=0; i<m; i++) { 1364 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1365 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1366 } 1367 aj = Aloc->j; 1368 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1369 1370 /* copy over the B part */ 1371 Aloc = (Mat_SeqAIJ*)a->B->data; 1372 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1373 row = a->rstart; 1374 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1375 ct = cols; 1376 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1377 for (i=0; i<m; i++) { 1378 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1379 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1380 } 1381 ierr = PetscFree(ct);CHKERRQ(ierr); 1382 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1383 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1384 if (matout) { 1385 *matout = B; 1386 } else { 1387 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1394 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1395 { 1396 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1397 Mat a = aij->A,b = aij->B; 1398 int ierr,s1,s2,s3; 1399 1400 PetscFunctionBegin; 1401 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1402 if (rr) { 1403 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1404 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1405 /* Overlap communication with computation. */ 1406 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1407 } 1408 if (ll) { 1409 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1410 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1411 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1412 } 1413 /* scale the diagonal block */ 1414 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1415 1416 if (rr) { 1417 /* Do a scatter end and then right scale the off-diagonal block */ 1418 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1419 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1420 } 1421 1422 PetscFunctionReturn(0); 1423 } 1424 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1428 int MatPrintHelp_MPIAIJ(Mat A) 1429 { 1430 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1431 int ierr; 1432 1433 PetscFunctionBegin; 1434 if (!a->rank) { 1435 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1436 } 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1442 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1443 { 1444 PetscFunctionBegin; 1445 *bs = 1; 1446 PetscFunctionReturn(0); 1447 } 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1450 int MatSetUnfactored_MPIAIJ(Mat A) 1451 { 1452 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1453 int ierr; 1454 1455 PetscFunctionBegin; 1456 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } 1459 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatEqual_MPIAIJ" 1462 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1463 { 1464 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1465 Mat a,b,c,d; 1466 PetscTruth flg; 1467 int ierr; 1468 1469 PetscFunctionBegin; 1470 a = matA->A; b = matA->B; 1471 c = matB->A; d = matB->B; 1472 1473 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1474 if (flg == PETSC_TRUE) { 1475 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1476 } 1477 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1478 PetscFunctionReturn(0); 1479 } 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatCopy_MPIAIJ" 1483 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1484 { 1485 int ierr; 1486 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1487 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1488 1489 PetscFunctionBegin; 1490 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1491 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1492 /* because of the column compression in the off-processor part of the matrix a->B, 1493 the number of columns in a->B and b->B may be different, hence we cannot call 1494 the MatCopy() directly on the two parts. If need be, we can provide a more 1495 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1496 then copying the submatrices */ 1497 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1498 } else { 1499 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1500 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1507 int MatSetUpPreallocation_MPIAIJ(Mat A) 1508 { 1509 int ierr; 1510 1511 PetscFunctionBegin; 1512 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #include "petscblaslapack.h" 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatAXPY_MPIAIJ" 1519 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1520 { 1521 int ierr,one=1,i; 1522 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1523 Mat_SeqAIJ *x,*y; 1524 1525 PetscFunctionBegin; 1526 if (str == SAME_NONZERO_PATTERN) { 1527 x = (Mat_SeqAIJ *)xx->A->data; 1528 y = (Mat_SeqAIJ *)yy->A->data; 1529 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1530 x = (Mat_SeqAIJ *)xx->B->data; 1531 y = (Mat_SeqAIJ *)yy->B->data; 1532 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1533 } else if (str == SUBSET_NONZERO_PATTERN) { 1534 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1535 1536 x = (Mat_SeqAIJ *)xx->B->data; 1537 y = (Mat_SeqAIJ *)yy->B->data; 1538 if (y->xtoy && y->XtoY != xx->B) { 1539 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1540 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1541 } 1542 if (!y->xtoy) { /* get xtoy */ 1543 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1544 y->XtoY = xx->B; 1545 } 1546 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1547 } else { 1548 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1549 } 1550 PetscFunctionReturn(0); 1551 } 1552 1553 /* -------------------------------------------------------------------*/ 1554 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1555 MatGetRow_MPIAIJ, 1556 MatRestoreRow_MPIAIJ, 1557 MatMult_MPIAIJ, 1558 /* 4*/ MatMultAdd_MPIAIJ, 1559 MatMultTranspose_MPIAIJ, 1560 MatMultTransposeAdd_MPIAIJ, 1561 0, 1562 0, 1563 0, 1564 /*10*/ 0, 1565 0, 1566 0, 1567 MatRelax_MPIAIJ, 1568 MatTranspose_MPIAIJ, 1569 /*15*/ MatGetInfo_MPIAIJ, 1570 MatEqual_MPIAIJ, 1571 MatGetDiagonal_MPIAIJ, 1572 MatDiagonalScale_MPIAIJ, 1573 MatNorm_MPIAIJ, 1574 /*20*/ MatAssemblyBegin_MPIAIJ, 1575 MatAssemblyEnd_MPIAIJ, 1576 0, 1577 MatSetOption_MPIAIJ, 1578 MatZeroEntries_MPIAIJ, 1579 /*25*/ MatZeroRows_MPIAIJ, 1580 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1581 MatLUFactorSymbolic_MPIAIJ_TFS, 1582 #else 1583 0, 1584 #endif 1585 0, 1586 0, 1587 0, 1588 /*30*/ MatSetUpPreallocation_MPIAIJ, 1589 0, 1590 0, 1591 0, 1592 0, 1593 /*35*/ MatDuplicate_MPIAIJ, 1594 0, 1595 0, 1596 0, 1597 0, 1598 /*40*/ MatAXPY_MPIAIJ, 1599 MatGetSubMatrices_MPIAIJ, 1600 MatIncreaseOverlap_MPIAIJ, 1601 MatGetValues_MPIAIJ, 1602 MatCopy_MPIAIJ, 1603 /*45*/ MatPrintHelp_MPIAIJ, 1604 MatScale_MPIAIJ, 1605 0, 1606 0, 1607 0, 1608 /*50*/ MatGetBlockSize_MPIAIJ, 1609 0, 1610 0, 1611 0, 1612 0, 1613 /*55*/ MatFDColoringCreate_MPIAIJ, 1614 0, 1615 MatSetUnfactored_MPIAIJ, 1616 0, 1617 0, 1618 /*60*/ MatGetSubMatrix_MPIAIJ, 1619 MatDestroy_MPIAIJ, 1620 MatView_MPIAIJ, 1621 MatGetPetscMaps_Petsc, 1622 0, 1623 /*65*/ 0, 1624 0, 1625 0, 1626 0, 1627 0, 1628 /*70*/ 0, 1629 0, 1630 MatSetColoring_MPIAIJ, 1631 MatSetValuesAdic_MPIAIJ, 1632 MatSetValuesAdifor_MPIAIJ, 1633 /*75*/ 0, 1634 0, 1635 0, 1636 0, 1637 0, 1638 /*80*/ 0, 1639 0, 1640 0, 1641 0, 1642 0, 1643 /*85*/ MatLoad_MPIAIJ}; 1644 1645 /* ----------------------------------------------------------------------------------------*/ 1646 1647 EXTERN_C_BEGIN 1648 #undef __FUNCT__ 1649 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1650 int MatStoreValues_MPIAIJ(Mat mat) 1651 { 1652 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1653 int ierr; 1654 1655 PetscFunctionBegin; 1656 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1657 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1658 PetscFunctionReturn(0); 1659 } 1660 EXTERN_C_END 1661 1662 EXTERN_C_BEGIN 1663 #undef __FUNCT__ 1664 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1665 int MatRetrieveValues_MPIAIJ(Mat mat) 1666 { 1667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1668 int ierr; 1669 1670 PetscFunctionBegin; 1671 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1672 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1673 PetscFunctionReturn(0); 1674 } 1675 EXTERN_C_END 1676 1677 #include "petscpc.h" 1678 EXTERN_C_BEGIN 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1681 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1682 { 1683 Mat_MPIAIJ *b; 1684 int ierr,i; 1685 1686 PetscFunctionBegin; 1687 B->preallocated = PETSC_TRUE; 1688 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1689 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1690 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1691 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1692 if (d_nnz) { 1693 for (i=0; i<B->m; i++) { 1694 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1695 } 1696 } 1697 if (o_nnz) { 1698 for (i=0; i<B->m; i++) { 1699 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1700 } 1701 } 1702 b = (Mat_MPIAIJ*)B->data; 1703 1704 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1705 PetscLogObjectParent(B,b->A); 1706 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1707 PetscLogObjectParent(B,b->B); 1708 1709 PetscFunctionReturn(0); 1710 } 1711 EXTERN_C_END 1712 1713 /*MC 1714 MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1715 1716 Options Database Keys: 1717 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1718 1719 Level: beginner 1720 1721 .seealso: MatCreateMPIAIJ 1722 M*/ 1723 1724 EXTERN_C_BEGIN 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatCreate_MPIAIJ" 1727 int MatCreate_MPIAIJ(Mat B) 1728 { 1729 Mat_MPIAIJ *b; 1730 int ierr,i,size; 1731 1732 PetscFunctionBegin; 1733 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1734 1735 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1736 B->data = (void*)b; 1737 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1738 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1739 B->factor = 0; 1740 B->assembled = PETSC_FALSE; 1741 B->mapping = 0; 1742 1743 B->insertmode = NOT_SET_VALUES; 1744 b->size = size; 1745 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1746 1747 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1748 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1749 1750 /* the information in the maps duplicates the information computed below, eventually 1751 we should remove the duplicate information that is not contained in the maps */ 1752 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1753 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1754 1755 /* build local table of row and column ownerships */ 1756 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1757 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1758 b->cowners = b->rowners + b->size + 2; 1759 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1760 b->rowners[0] = 0; 1761 for (i=2; i<=b->size; i++) { 1762 b->rowners[i] += b->rowners[i-1]; 1763 } 1764 b->rstart = b->rowners[b->rank]; 1765 b->rend = b->rowners[b->rank+1]; 1766 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1767 b->cowners[0] = 0; 1768 for (i=2; i<=b->size; i++) { 1769 b->cowners[i] += b->cowners[i-1]; 1770 } 1771 b->cstart = b->cowners[b->rank]; 1772 b->cend = b->cowners[b->rank+1]; 1773 1774 /* build cache for off array entries formed */ 1775 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1776 b->donotstash = PETSC_FALSE; 1777 b->colmap = 0; 1778 b->garray = 0; 1779 b->roworiented = PETSC_TRUE; 1780 1781 /* stuff used for matrix vector multiply */ 1782 b->lvec = PETSC_NULL; 1783 b->Mvctx = PETSC_NULL; 1784 1785 /* stuff for MatGetRow() */ 1786 b->rowindices = 0; 1787 b->rowvalues = 0; 1788 b->getrowactive = PETSC_FALSE; 1789 1790 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1791 "MatStoreValues_MPIAIJ", 1792 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1793 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1794 "MatRetrieveValues_MPIAIJ", 1795 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1796 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1797 "MatGetDiagonalBlock_MPIAIJ", 1798 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1799 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C", 1800 "MatIsSymmetric_MPIAIJ", 1801 MatIsSymmetric_MPIAIJ); CHKERRQ(ierr); 1802 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1803 "MatMPIAIJSetPreallocation_MPIAIJ", 1804 MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr); 1805 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1806 "MatDiagonalScaleLocal_MPIAIJ", 1807 MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr); 1808 PetscFunctionReturn(0); 1809 } 1810 EXTERN_C_END 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1814 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1815 { 1816 Mat mat; 1817 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1818 int ierr; 1819 1820 PetscFunctionBegin; 1821 *newmat = 0; 1822 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1823 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1824 a = (Mat_MPIAIJ*)mat->data; 1825 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1826 mat->factor = matin->factor; 1827 mat->assembled = PETSC_TRUE; 1828 mat->insertmode = NOT_SET_VALUES; 1829 mat->preallocated = PETSC_TRUE; 1830 1831 a->rstart = oldmat->rstart; 1832 a->rend = oldmat->rend; 1833 a->cstart = oldmat->cstart; 1834 a->cend = oldmat->cend; 1835 a->size = oldmat->size; 1836 a->rank = oldmat->rank; 1837 a->donotstash = oldmat->donotstash; 1838 a->roworiented = oldmat->roworiented; 1839 a->rowindices = 0; 1840 a->rowvalues = 0; 1841 a->getrowactive = PETSC_FALSE; 1842 1843 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1844 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1845 if (oldmat->colmap) { 1846 #if defined (PETSC_USE_CTABLE) 1847 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1848 #else 1849 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1850 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1851 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1852 #endif 1853 } else a->colmap = 0; 1854 if (oldmat->garray) { 1855 int len; 1856 len = oldmat->B->n; 1857 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1858 PetscLogObjectMemory(mat,len*sizeof(int)); 1859 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1860 } else a->garray = 0; 1861 1862 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1863 PetscLogObjectParent(mat,a->lvec); 1864 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1865 PetscLogObjectParent(mat,a->Mvctx); 1866 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1867 PetscLogObjectParent(mat,a->A); 1868 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1869 PetscLogObjectParent(mat,a->B); 1870 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1871 *newmat = mat; 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #include "petscsys.h" 1876 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "MatLoad_MPIAIJ" 1879 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1880 { 1881 Mat A; 1882 PetscScalar *vals,*svals; 1883 MPI_Comm comm = ((PetscObject)viewer)->comm; 1884 MPI_Status status; 1885 int i,nz,ierr,j,rstart,rend,fd; 1886 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1887 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1888 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1889 1890 PetscFunctionBegin; 1891 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1892 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1893 if (!rank) { 1894 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1895 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1896 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1897 if (header[3] < 0) { 1898 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1899 } 1900 } 1901 1902 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1903 M = header[1]; N = header[2]; 1904 /* determine ownership of all rows */ 1905 m = M/size + ((M % size) > rank); 1906 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1907 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1908 rowners[0] = 0; 1909 for (i=2; i<=size; i++) { 1910 rowners[i] += rowners[i-1]; 1911 } 1912 rstart = rowners[rank]; 1913 rend = rowners[rank+1]; 1914 1915 /* distribute row lengths to all processors */ 1916 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1917 offlens = ourlens + (rend-rstart); 1918 if (!rank) { 1919 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1920 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1921 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1922 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1923 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1924 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1925 } else { 1926 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1927 } 1928 1929 if (!rank) { 1930 /* calculate the number of nonzeros on each processor */ 1931 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1932 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1933 for (i=0; i<size; i++) { 1934 for (j=rowners[i]; j< rowners[i+1]; j++) { 1935 procsnz[i] += rowlengths[j]; 1936 } 1937 } 1938 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1939 1940 /* determine max buffer needed and allocate it */ 1941 maxnz = 0; 1942 for (i=0; i<size; i++) { 1943 maxnz = PetscMax(maxnz,procsnz[i]); 1944 } 1945 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1946 1947 /* read in my part of the matrix column indices */ 1948 nz = procsnz[0]; 1949 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1950 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1951 1952 /* read in every one elses and ship off */ 1953 for (i=1; i<size; i++) { 1954 nz = procsnz[i]; 1955 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1956 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1957 } 1958 ierr = PetscFree(cols);CHKERRQ(ierr); 1959 } else { 1960 /* determine buffer space needed for message */ 1961 nz = 0; 1962 for (i=0; i<m; i++) { 1963 nz += ourlens[i]; 1964 } 1965 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1966 1967 /* receive message of column indices*/ 1968 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1969 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1970 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1971 } 1972 1973 /* determine column ownership if matrix is not square */ 1974 if (N != M) { 1975 n = N/size + ((N % size) > rank); 1976 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1977 cstart = cend - n; 1978 } else { 1979 cstart = rstart; 1980 cend = rend; 1981 n = cend - cstart; 1982 } 1983 1984 /* loop over local rows, determining number of off diagonal entries */ 1985 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1986 jj = 0; 1987 for (i=0; i<m; i++) { 1988 for (j=0; j<ourlens[i]; j++) { 1989 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1990 jj++; 1991 } 1992 } 1993 1994 /* create our matrix */ 1995 for (i=0; i<m; i++) { 1996 ourlens[i] -= offlens[i]; 1997 } 1998 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 1999 ierr = MatSetType(A,type);CHKERRQ(ierr); 2000 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2001 2002 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2003 for (i=0; i<m; i++) { 2004 ourlens[i] += offlens[i]; 2005 } 2006 2007 if (!rank) { 2008 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2009 2010 /* read in my part of the matrix numerical values */ 2011 nz = procsnz[0]; 2012 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2013 2014 /* insert into matrix */ 2015 jj = rstart; 2016 smycols = mycols; 2017 svals = vals; 2018 for (i=0; i<m; i++) { 2019 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2020 smycols += ourlens[i]; 2021 svals += ourlens[i]; 2022 jj++; 2023 } 2024 2025 /* read in other processors and ship out */ 2026 for (i=1; i<size; i++) { 2027 nz = procsnz[i]; 2028 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2029 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2030 } 2031 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2032 } else { 2033 /* receive numeric values */ 2034 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2035 2036 /* receive message of values*/ 2037 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2038 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2039 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2040 2041 /* insert into matrix */ 2042 jj = rstart; 2043 smycols = mycols; 2044 svals = vals; 2045 for (i=0; i<m; i++) { 2046 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2047 smycols += ourlens[i]; 2048 svals += ourlens[i]; 2049 jj++; 2050 } 2051 } 2052 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2053 ierr = PetscFree(vals);CHKERRQ(ierr); 2054 ierr = PetscFree(mycols);CHKERRQ(ierr); 2055 ierr = PetscFree(rowners);CHKERRQ(ierr); 2056 2057 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2058 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2059 *newmat = A; 2060 PetscFunctionReturn(0); 2061 } 2062 2063 #undef __FUNCT__ 2064 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2065 /* 2066 Not great since it makes two copies of the submatrix, first an SeqAIJ 2067 in local and then by concatenating the local matrices the end result. 2068 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2069 */ 2070 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2071 { 2072 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2073 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2074 Mat *local,M,Mreuse; 2075 PetscScalar *vwork,*aa; 2076 MPI_Comm comm = mat->comm; 2077 Mat_SeqAIJ *aij; 2078 2079 2080 PetscFunctionBegin; 2081 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2082 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2083 2084 if (call == MAT_REUSE_MATRIX) { 2085 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2086 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2087 local = &Mreuse; 2088 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2089 } else { 2090 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2091 Mreuse = *local; 2092 ierr = PetscFree(local);CHKERRQ(ierr); 2093 } 2094 2095 /* 2096 m - number of local rows 2097 n - number of columns (same on all processors) 2098 rstart - first row in new global matrix generated 2099 */ 2100 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2101 if (call == MAT_INITIAL_MATRIX) { 2102 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2103 ii = aij->i; 2104 jj = aij->j; 2105 2106 /* 2107 Determine the number of non-zeros in the diagonal and off-diagonal 2108 portions of the matrix in order to do correct preallocation 2109 */ 2110 2111 /* first get start and end of "diagonal" columns */ 2112 if (csize == PETSC_DECIDE) { 2113 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2114 if (mglobal == n) { /* square matrix */ 2115 nlocal = m; 2116 } else { 2117 nlocal = n/size + ((n % size) > rank); 2118 } 2119 } else { 2120 nlocal = csize; 2121 } 2122 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2123 rstart = rend - nlocal; 2124 if (rank == size - 1 && rend != n) { 2125 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2126 } 2127 2128 /* next, compute all the lengths */ 2129 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2130 olens = dlens + m; 2131 for (i=0; i<m; i++) { 2132 jend = ii[i+1] - ii[i]; 2133 olen = 0; 2134 dlen = 0; 2135 for (j=0; j<jend; j++) { 2136 if (*jj < rstart || *jj >= rend) olen++; 2137 else dlen++; 2138 jj++; 2139 } 2140 olens[i] = olen; 2141 dlens[i] = dlen; 2142 } 2143 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2144 ierr = PetscFree(dlens);CHKERRQ(ierr); 2145 } else { 2146 int ml,nl; 2147 2148 M = *newmat; 2149 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2150 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2151 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2152 /* 2153 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2154 rather than the slower MatSetValues(). 2155 */ 2156 M->was_assembled = PETSC_TRUE; 2157 M->assembled = PETSC_FALSE; 2158 } 2159 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2160 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2161 ii = aij->i; 2162 jj = aij->j; 2163 aa = aij->a; 2164 for (i=0; i<m; i++) { 2165 row = rstart + i; 2166 nz = ii[i+1] - ii[i]; 2167 cwork = jj; jj += nz; 2168 vwork = aa; aa += nz; 2169 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2170 } 2171 2172 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2173 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2174 *newmat = M; 2175 2176 /* save submatrix used in processor for next request */ 2177 if (call == MAT_INITIAL_MATRIX) { 2178 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2179 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2180 } 2181 2182 PetscFunctionReturn(0); 2183 } 2184 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2187 /*@C 2188 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2189 (the default parallel PETSc format). For good matrix assembly performance 2190 the user should preallocate the matrix storage by setting the parameters 2191 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2192 performance can be increased by more than a factor of 50. 2193 2194 Collective on MPI_Comm 2195 2196 Input Parameters: 2197 + A - the matrix 2198 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2199 (same value is used for all local rows) 2200 . d_nnz - array containing the number of nonzeros in the various rows of the 2201 DIAGONAL portion of the local submatrix (possibly different for each row) 2202 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2203 The size of this array is equal to the number of local rows, i.e 'm'. 2204 You must leave room for the diagonal entry even if it is zero. 2205 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2206 submatrix (same value is used for all local rows). 2207 - o_nnz - array containing the number of nonzeros in the various rows of the 2208 OFF-DIAGONAL portion of the local submatrix (possibly different for 2209 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2210 structure. The size of this array is equal to the number 2211 of local rows, i.e 'm'. 2212 2213 The AIJ format (also called the Yale sparse matrix format or 2214 compressed row storage), is fully compatible with standard Fortran 77 2215 storage. That is, the stored row and column indices can begin at 2216 either one (as in Fortran) or zero. See the users manual for details. 2217 2218 The user MUST specify either the local or global matrix dimensions 2219 (possibly both). 2220 2221 The parallel matrix is partitioned such that the first m0 rows belong to 2222 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2223 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2224 2225 The DIAGONAL portion of the local submatrix of a processor can be defined 2226 as the submatrix which is obtained by extraction the part corresponding 2227 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2228 first row that belongs to the processor, and r2 is the last row belonging 2229 to the this processor. This is a square mxm matrix. The remaining portion 2230 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2231 2232 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2233 2234 By default, this format uses inodes (identical nodes) when possible. 2235 We search for consecutive rows with the same nonzero structure, thereby 2236 reusing matrix information to achieve increased efficiency. 2237 2238 Options Database Keys: 2239 + -mat_aij_no_inode - Do not use inodes 2240 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2241 - -mat_aij_oneindex - Internally use indexing starting at 1 2242 rather than 0. Note that when calling MatSetValues(), 2243 the user still MUST index entries starting at 0! 2244 2245 Example usage: 2246 2247 Consider the following 8x8 matrix with 34 non-zero values, that is 2248 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2249 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2250 as follows: 2251 2252 .vb 2253 1 2 0 | 0 3 0 | 0 4 2254 Proc0 0 5 6 | 7 0 0 | 8 0 2255 9 0 10 | 11 0 0 | 12 0 2256 ------------------------------------- 2257 13 0 14 | 15 16 17 | 0 0 2258 Proc1 0 18 0 | 19 20 21 | 0 0 2259 0 0 0 | 22 23 0 | 24 0 2260 ------------------------------------- 2261 Proc2 25 26 27 | 0 0 28 | 29 0 2262 30 0 0 | 31 32 33 | 0 34 2263 .ve 2264 2265 This can be represented as a collection of submatrices as: 2266 2267 .vb 2268 A B C 2269 D E F 2270 G H I 2271 .ve 2272 2273 Where the submatrices A,B,C are owned by proc0, D,E,F are 2274 owned by proc1, G,H,I are owned by proc2. 2275 2276 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2277 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2278 The 'M','N' parameters are 8,8, and have the same values on all procs. 2279 2280 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2281 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2282 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2283 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2284 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2285 matrix, ans [DF] as another SeqAIJ matrix. 2286 2287 When d_nz, o_nz parameters are specified, d_nz storage elements are 2288 allocated for every row of the local diagonal submatrix, and o_nz 2289 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2290 One way to choose d_nz and o_nz is to use the max nonzerors per local 2291 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2292 In this case, the values of d_nz,o_nz are: 2293 .vb 2294 proc0 : dnz = 2, o_nz = 2 2295 proc1 : dnz = 3, o_nz = 2 2296 proc2 : dnz = 1, o_nz = 4 2297 .ve 2298 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2299 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2300 for proc3. i.e we are using 12+15+10=37 storage locations to store 2301 34 values. 2302 2303 When d_nnz, o_nnz parameters are specified, the storage is specified 2304 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2305 In the above case the values for d_nnz,o_nnz are: 2306 .vb 2307 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2308 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2309 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2310 .ve 2311 Here the space allocated is sum of all the above values i.e 34, and 2312 hence pre-allocation is perfect. 2313 2314 Level: intermediate 2315 2316 .keywords: matrix, aij, compressed row, sparse, parallel 2317 2318 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2319 @*/ 2320 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2321 { 2322 int ierr,(*f)(Mat,int,const int[],int,const int[]); 2323 2324 PetscFunctionBegin; 2325 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2326 if (f) { 2327 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2328 } 2329 PetscFunctionReturn(0); 2330 } 2331 2332 #undef __FUNCT__ 2333 #define __FUNCT__ "MatCreateMPIAIJ" 2334 /*@C 2335 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2336 (the default parallel PETSc format). For good matrix assembly performance 2337 the user should preallocate the matrix storage by setting the parameters 2338 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2339 performance can be increased by more than a factor of 50. 2340 2341 Collective on MPI_Comm 2342 2343 Input Parameters: 2344 + comm - MPI communicator 2345 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2346 This value should be the same as the local size used in creating the 2347 y vector for the matrix-vector product y = Ax. 2348 . n - This value should be the same as the local size used in creating the 2349 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2350 calculated if N is given) For square matrices n is almost always m. 2351 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2352 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2353 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2354 (same value is used for all local rows) 2355 . d_nnz - array containing the number of nonzeros in the various rows of the 2356 DIAGONAL portion of the local submatrix (possibly different for each row) 2357 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2358 The size of this array is equal to the number of local rows, i.e 'm'. 2359 You must leave room for the diagonal entry even if it is zero. 2360 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2361 submatrix (same value is used for all local rows). 2362 - o_nnz - array containing the number of nonzeros in the various rows of the 2363 OFF-DIAGONAL portion of the local submatrix (possibly different for 2364 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2365 structure. The size of this array is equal to the number 2366 of local rows, i.e 'm'. 2367 2368 Output Parameter: 2369 . A - the matrix 2370 2371 Notes: 2372 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2373 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2374 storage requirements for this matrix. 2375 2376 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2377 processor than it must be used on all processors that share the object for 2378 that argument. 2379 2380 The AIJ format (also called the Yale sparse matrix format or 2381 compressed row storage), is fully compatible with standard Fortran 77 2382 storage. That is, the stored row and column indices can begin at 2383 either one (as in Fortran) or zero. See the users manual for details. 2384 2385 The user MUST specify either the local or global matrix dimensions 2386 (possibly both). 2387 2388 The parallel matrix is partitioned such that the first m0 rows belong to 2389 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2390 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2391 2392 The DIAGONAL portion of the local submatrix of a processor can be defined 2393 as the submatrix which is obtained by extraction the part corresponding 2394 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2395 first row that belongs to the processor, and r2 is the last row belonging 2396 to the this processor. This is a square mxm matrix. The remaining portion 2397 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2398 2399 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2400 2401 When calling this routine with a single process communicator, a matrix of 2402 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2403 type of communicator, use the construction mechanism: 2404 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2405 2406 By default, this format uses inodes (identical nodes) when possible. 2407 We search for consecutive rows with the same nonzero structure, thereby 2408 reusing matrix information to achieve increased efficiency. 2409 2410 Options Database Keys: 2411 + -mat_aij_no_inode - Do not use inodes 2412 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2413 - -mat_aij_oneindex - Internally use indexing starting at 1 2414 rather than 0. Note that when calling MatSetValues(), 2415 the user still MUST index entries starting at 0! 2416 2417 2418 Example usage: 2419 2420 Consider the following 8x8 matrix with 34 non-zero values, that is 2421 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2422 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2423 as follows: 2424 2425 .vb 2426 1 2 0 | 0 3 0 | 0 4 2427 Proc0 0 5 6 | 7 0 0 | 8 0 2428 9 0 10 | 11 0 0 | 12 0 2429 ------------------------------------- 2430 13 0 14 | 15 16 17 | 0 0 2431 Proc1 0 18 0 | 19 20 21 | 0 0 2432 0 0 0 | 22 23 0 | 24 0 2433 ------------------------------------- 2434 Proc2 25 26 27 | 0 0 28 | 29 0 2435 30 0 0 | 31 32 33 | 0 34 2436 .ve 2437 2438 This can be represented as a collection of submatrices as: 2439 2440 .vb 2441 A B C 2442 D E F 2443 G H I 2444 .ve 2445 2446 Where the submatrices A,B,C are owned by proc0, D,E,F are 2447 owned by proc1, G,H,I are owned by proc2. 2448 2449 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2450 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2451 The 'M','N' parameters are 8,8, and have the same values on all procs. 2452 2453 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2454 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2455 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2456 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2457 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2458 matrix, ans [DF] as another SeqAIJ matrix. 2459 2460 When d_nz, o_nz parameters are specified, d_nz storage elements are 2461 allocated for every row of the local diagonal submatrix, and o_nz 2462 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2463 One way to choose d_nz and o_nz is to use the max nonzerors per local 2464 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2465 In this case, the values of d_nz,o_nz are: 2466 .vb 2467 proc0 : dnz = 2, o_nz = 2 2468 proc1 : dnz = 3, o_nz = 2 2469 proc2 : dnz = 1, o_nz = 4 2470 .ve 2471 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2472 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2473 for proc3. i.e we are using 12+15+10=37 storage locations to store 2474 34 values. 2475 2476 When d_nnz, o_nnz parameters are specified, the storage is specified 2477 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2478 In the above case the values for d_nnz,o_nnz are: 2479 .vb 2480 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2481 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2482 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2483 .ve 2484 Here the space allocated is sum of all the above values i.e 34, and 2485 hence pre-allocation is perfect. 2486 2487 Level: intermediate 2488 2489 .keywords: matrix, aij, compressed row, sparse, parallel 2490 2491 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2492 @*/ 2493 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2494 { 2495 int ierr,size; 2496 2497 PetscFunctionBegin; 2498 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2499 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2500 if (size > 1) { 2501 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2502 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2503 } else { 2504 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2505 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2506 } 2507 PetscFunctionReturn(0); 2508 } 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2512 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2513 { 2514 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2515 PetscFunctionBegin; 2516 *Ad = a->A; 2517 *Ao = a->B; 2518 *colmap = a->garray; 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2524 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2525 { 2526 int ierr,i; 2527 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2528 2529 PetscFunctionBegin; 2530 if (coloring->ctype == IS_COLORING_LOCAL) { 2531 ISColoringValue *allcolors,*colors; 2532 ISColoring ocoloring; 2533 2534 /* set coloring for diagonal portion */ 2535 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2536 2537 /* set coloring for off-diagonal portion */ 2538 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2539 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2540 for (i=0; i<a->B->n; i++) { 2541 colors[i] = allcolors[a->garray[i]]; 2542 } 2543 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2544 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2545 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2546 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2547 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2548 ISColoringValue *colors; 2549 int *larray; 2550 ISColoring ocoloring; 2551 2552 /* set coloring for diagonal portion */ 2553 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2554 for (i=0; i<a->A->n; i++) { 2555 larray[i] = i + a->cstart; 2556 } 2557 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2558 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2559 for (i=0; i<a->A->n; i++) { 2560 colors[i] = coloring->colors[larray[i]]; 2561 } 2562 ierr = PetscFree(larray);CHKERRQ(ierr); 2563 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2564 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2565 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2566 2567 /* set coloring for off-diagonal portion */ 2568 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2569 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2570 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2571 for (i=0; i<a->B->n; i++) { 2572 colors[i] = coloring->colors[larray[i]]; 2573 } 2574 ierr = PetscFree(larray);CHKERRQ(ierr); 2575 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2576 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2577 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2578 } else { 2579 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2580 } 2581 2582 PetscFunctionReturn(0); 2583 } 2584 2585 #undef __FUNCT__ 2586 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2587 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2588 { 2589 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2590 int ierr; 2591 2592 PetscFunctionBegin; 2593 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2594 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2595 PetscFunctionReturn(0); 2596 } 2597 2598 #undef __FUNCT__ 2599 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2600 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2601 { 2602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2603 int ierr; 2604 2605 PetscFunctionBegin; 2606 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2607 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2608 PetscFunctionReturn(0); 2609 } 2610 2611 #undef __FUNCT__ 2612 #define __FUNCT__ "MatMerge" 2613 /*@C 2614 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2615 matrices from each processor 2616 2617 Collective on MPI_Comm 2618 2619 Input Parameters: 2620 + comm - the communicators the parallel matrix will live on 2621 - inmat - the input sequential matrices 2622 2623 Output Parameter: 2624 . outmat - the parallel matrix generated 2625 2626 Level: advanced 2627 2628 Notes: The number of columns of the matrix in EACH of the seperate files 2629 MUST be the same. 2630 2631 @*/ 2632 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2633 { 2634 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2635 PetscScalar *values; 2636 PetscMap columnmap,rowmap; 2637 2638 PetscFunctionBegin; 2639 2640 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2641 2642 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2643 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2644 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2645 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2646 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2647 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2648 2649 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2650 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2651 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2652 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2653 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2654 2655 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2656 for (i=0;i<m;i++) { 2657 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2658 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2659 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2660 } 2661 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2662 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2663 2664 for (i=0;i<m;i++) { 2665 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2666 I = i + rstart; 2667 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2668 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2669 } 2670 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2671 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2672 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2673 2674 PetscFunctionReturn(0); 2675 } 2676 2677 #undef __FUNCT__ 2678 #define __FUNCT__ "MatFileSplit" 2679 int MatFileSplit(Mat A,char *outfile) 2680 { 2681 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2682 PetscViewer out; 2683 char *name; 2684 Mat B; 2685 PetscScalar *values; 2686 2687 PetscFunctionBegin; 2688 2689 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2690 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2691 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2692 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2693 for (i=0;i<m;i++) { 2694 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2695 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2696 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2697 } 2698 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2699 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2700 2701 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2702 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2703 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2704 sprintf(name,"%s.%d",outfile,rank); 2705 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2706 ierr = PetscFree(name); 2707 ierr = MatView(B,out);CHKERRQ(ierr); 2708 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2709 ierr = MatDestroy(B);CHKERRQ(ierr); 2710 PetscFunctionReturn(0); 2711 } 2712