1 /*$Id: mpiaij.c,v 1.322 2000/10/24 20:25:36 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ /*<a name="CreateColmap_MPIAIJ_Private"></a>*/"CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 aij->colmap = (int*)PetscMalloc((mat->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 36 PLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 Scalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(Scalar))+(am+1)*sizeof(int); \ 76 new_a = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(Scalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 Scalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(Scalar))+(bm+1)*sizeof(int); \ 150 new_a = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(Scalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNC__ 193 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 Scalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 Scalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 Scalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 Scalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNC__ 279 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNC__ 322 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNC__ 349 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 Scalar *val; 356 InsertMode addv = mat->insertmode; 357 358 PetscFunctionBegin; 359 if (!aij->donotstash) { 360 while (1) { 361 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 362 if (!flg) break; 363 364 for (i=0; i<n;) { 365 /* Now identify the consecutive vals belonging to the same row */ 366 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 367 if (j < n) ncols = j-i; 368 else ncols = n-i; 369 /* Now assemble all these values with a single function call */ 370 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 371 i = j; 372 } 373 } 374 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 375 } 376 377 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 378 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 379 380 /* determine if any processor has disassembled, if so we must 381 also disassemble ourselfs, in order that we may reassemble. */ 382 /* 383 if nonzero structure of submatrix B cannot change then we know that 384 no processor disassembled thus we can skip this stuff 385 */ 386 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 387 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 388 if (mat->was_assembled && !other_disassembled) { 389 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNC__ 407 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIAIJ" 408 int MatZeroEntries_MPIAIJ(Mat A) 409 { 410 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 411 int ierr; 412 413 PetscFunctionBegin; 414 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 415 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNC__ 420 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIAIJ" 421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 425 int *procs,*nprocs,j,idx,nsends,*work,row; 426 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 427 int *rvalues,tag = A->tag,count,base,slen,n,*source; 428 int *lens,imdex,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 PetscTruth found; 434 435 PetscFunctionBegin; 436 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 437 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 438 439 /* first count number of contributors to each processor */ 440 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 441 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 442 procs = nprocs + size; 443 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 444 for (i=0; i<N; i++) { 445 idx = rows[i]; 446 found = PETSC_FALSE; 447 for (j=0; j<size; j++) { 448 if (idx >= owners[j] && idx < owners[j+1]) { 449 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 450 } 451 } 452 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 453 } 454 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 455 456 /* inform other processors of number of messages and max length*/ 457 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 458 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 459 nrecvs = work[size+rank]; 460 nmax = work[rank]; 461 ierr = PetscFree(work);CHKERRQ(ierr); 462 463 /* post receives: */ 464 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 465 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 466 for (i=0; i<nrecvs; i++) { 467 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 468 } 469 470 /* do sends: 471 1) starts[i] gives the starting index in svalues for stuff going to 472 the ith processor 473 */ 474 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 475 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 476 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 477 starts[0] = 0; 478 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 479 for (i=0; i<N; i++) { 480 svalues[starts[owner[i]]++] = rows[i]; 481 } 482 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 483 484 starts[0] = 0; 485 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 486 count = 0; 487 for (i=0; i<size; i++) { 488 if (procs[i]) { 489 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscFree(starts);CHKERRQ(ierr); 493 494 base = owners[rank]; 495 496 /* wait on receives */ 497 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 498 source = lens + nrecvs; 499 count = nrecvs; slen = 0; 500 while (count) { 501 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 502 /* unpack receives into our local space */ 503 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 504 source[imdex] = recv_status.MPI_SOURCE; 505 lens[imdex] = n; 506 slen += n; 507 count--; 508 } 509 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 510 511 /* move the data into the send scatter */ 512 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 513 count = 0; 514 for (i=0; i<nrecvs; i++) { 515 values = rvalues + i*nmax; 516 for (j=0; j<lens[i]; j++) { 517 lrows[count++] = values[j] - base; 518 } 519 } 520 ierr = PetscFree(rvalues);CHKERRQ(ierr); 521 ierr = PetscFree(lens);CHKERRQ(ierr); 522 ierr = PetscFree(owner);CHKERRQ(ierr); 523 ierr = PetscFree(nprocs);CHKERRQ(ierr); 524 525 /* actually zap the local rows */ 526 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 527 PLogObjectParent(A,istmp); 528 529 /* 530 Zero the required rows. If the "diagonal block" of the matrix 531 is square and the user wishes to set the diagonal we use seperate 532 code so that MatSetValues() is not called for each diagonal allocating 533 new memory, thus calling lots of mallocs and slowing things down. 534 535 Contributed by: Mathew Knepley 536 */ 537 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 538 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 539 if (diag && (l->A->M == l->A->N)) { 540 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 541 } else if (diag) { 542 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 543 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 544 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 546 } 547 for (i = 0; i < slen; i++) { 548 row = lrows[i] + rstart; 549 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 550 } 551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 } else { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 } 556 ierr = ISDestroy(istmp);CHKERRQ(ierr); 557 ierr = PetscFree(lrows);CHKERRQ(ierr); 558 559 /* wait on sends */ 560 if (nsends) { 561 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 562 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 563 ierr = PetscFree(send_status);CHKERRQ(ierr); 564 } 565 ierr = PetscFree(send_waits);CHKERRQ(ierr); 566 ierr = PetscFree(svalues);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNC__ 572 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIAIJ" 573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 574 { 575 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 576 int ierr,nt; 577 578 PetscFunctionBegin; 579 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 580 if (nt != A->n) { 581 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 582 } 583 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 585 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 586 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNC__ 591 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIAIJ" 592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 593 { 594 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 595 int ierr; 596 597 PetscFunctionBegin; 598 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 600 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 601 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNC__ 606 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIAIJ" 607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 608 { 609 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 610 int ierr; 611 612 PetscFunctionBegin; 613 /* do nondiagonal part */ 614 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 615 /* send it on its way */ 616 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 /* do local part */ 618 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 619 /* receive remote parts: note this assumes the values are not actually */ 620 /* inserted in yy until the next line, which is true for my implementation*/ 621 /* but is not perhaps always true. */ 622 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNC__ 627 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIAIJ" 628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 629 { 630 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 631 int ierr; 632 633 PetscFunctionBegin; 634 /* do nondiagonal part */ 635 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 636 /* send it on its way */ 637 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 638 /* do local part */ 639 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 640 /* receive remote parts: note this assumes the values are not actually */ 641 /* inserted in yy until the next line, which is true for my implementation*/ 642 /* but is not perhaps always true. */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNC__ 652 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIAIJ" 653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 int ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNC__ 668 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIAIJ" 669 int MatScale_MPIAIJ(Scalar *aa,Mat A) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 int ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 676 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNC__ 681 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIAIJ" 682 int MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 int ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNC__ 709 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ_ASCIIorDraworSocket" 710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 714 int ierr,format,shift = C->indexshift,rank = aij->rank,size = aij->size; 715 PetscTruth isdraw,isascii,flg; 716 Viewer sviewer; 717 718 PetscFunctionBegin; 719 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 720 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 721 if (isascii) { 722 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 723 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 724 MatInfo info; 725 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 726 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 727 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 728 if (flg) { 729 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 730 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 731 } else { 732 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 733 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 734 } 735 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 736 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 737 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 738 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 739 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 740 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 743 PetscFunctionReturn(0); 744 } 745 } else if (isdraw) { 746 Draw draw; 747 PetscTruth isnull; 748 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 749 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 750 } 751 752 if (size == 1) { 753 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 754 } else { 755 /* assemble the entire matrix onto first processor. */ 756 Mat A; 757 Mat_SeqAIJ *Aloc; 758 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 759 Scalar *a; 760 761 if (!rank) { 762 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 763 } else { 764 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 765 } 766 PLogObjectParent(mat,A); 767 768 /* copy over the A part */ 769 Aloc = (Mat_SeqAIJ*)aij->A->data; 770 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 771 row = aij->rstart; 772 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 773 for (i=0; i<m; i++) { 774 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 775 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 776 } 777 aj = Aloc->j; 778 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 779 780 /* copy over the B part */ 781 Aloc = (Mat_SeqAIJ*)aij->B->data; 782 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 783 row = aij->rstart; 784 ct = cols = (int*)PetscMalloc((ai[m]+1)*sizeof(int));CHKPTRQ(cols); 785 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 786 for (i=0; i<m; i++) { 787 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 788 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 789 } 790 ierr = PetscFree(ct);CHKERRQ(ierr); 791 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 792 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 793 /* 794 Everyone has to call to draw the matrix since the graphics waits are 795 synchronized across all processors that share the Draw object 796 */ 797 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 798 if (!rank) { 799 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 800 } 801 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 802 ierr = MatDestroy(A);CHKERRQ(ierr); 803 } 804 PetscFunctionReturn(0); 805 } 806 807 #undef __FUNC__ 808 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ" 809 int MatView_MPIAIJ(Mat mat,Viewer viewer) 810 { 811 int ierr; 812 PetscTruth isascii,isdraw,issocket,isbinary; 813 814 PetscFunctionBegin; 815 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 816 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 817 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 818 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 819 if (isascii || isdraw || isbinary || issocket) { 820 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 821 } else { 822 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 823 } 824 PetscFunctionReturn(0); 825 } 826 827 /* 828 This has to provide several versions. 829 830 2) a) use only local smoothing updating outer values only once. 831 b) local smoothing updating outer values each inner iteration 832 3) color updating out values betwen colors. 833 */ 834 #undef __FUNC__ 835 #define __FUNC__ /*<a name=""></a>*/"MatRelax_MPIAIJ" 836 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx) 837 { 838 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 839 Mat AA = mat->A,BB = mat->B; 840 Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data; 841 Scalar *b,*x,*xs,*ls,d,*v,sum; 842 int ierr,*idx,*diag; 843 int n = matin->n,m = matin->m,i,shift = A->indexshift; 844 845 PetscFunctionBegin; 846 if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);} 847 diag = A->diag; 848 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 849 if (flag & SOR_ZERO_INITIAL_GUESS) { 850 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 854 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 855 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 856 if (xx != bb) { 857 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 858 } else { 859 b = x; 860 } 861 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 862 xs = x + shift; /* shift by one for index start of 1 */ 863 ls = ls + shift; 864 while (its--) { 865 /* go down through the rows */ 866 for (i=0; i<m; i++) { 867 n = A->i[i+1] - A->i[i]; 868 PLogFlops(4*n+3); 869 idx = A->j + A->i[i] + shift; 870 v = A->a + A->i[i] + shift; 871 sum = b[i]; 872 SPARSEDENSEMDOT(sum,xs,v,idx,n); 873 d = fshift + A->a[diag[i]+shift]; 874 n = B->i[i+1] - B->i[i]; 875 idx = B->j + B->i[i] + shift; 876 v = B->a + B->i[i] + shift; 877 SPARSEDENSEMDOT(sum,ls,v,idx,n); 878 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 879 } 880 /* come up through the rows */ 881 for (i=m-1; i>-1; i--) { 882 n = A->i[i+1] - A->i[i]; 883 PLogFlops(4*n+3); 884 idx = A->j + A->i[i] + shift; 885 v = A->a + A->i[i] + shift; 886 sum = b[i]; 887 SPARSEDENSEMDOT(sum,xs,v,idx,n); 888 d = fshift + A->a[diag[i]+shift]; 889 n = B->i[i+1] - B->i[i]; 890 idx = B->j + B->i[i] + shift; 891 v = B->a + B->i[i] + shift; 892 SPARSEDENSEMDOT(sum,ls,v,idx,n); 893 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 894 } 895 } 896 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 897 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 898 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 899 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 900 if (flag & SOR_ZERO_INITIAL_GUESS) { 901 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 905 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 906 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 907 if (xx != bb) { 908 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 909 } else { 910 b = x; 911 } 912 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 913 xs = x + shift; /* shift by one for index start of 1 */ 914 ls = ls + shift; 915 while (its--) { 916 for (i=0; i<m; i++) { 917 n = A->i[i+1] - A->i[i]; 918 PLogFlops(4*n+3); 919 idx = A->j + A->i[i] + shift; 920 v = A->a + A->i[i] + shift; 921 sum = b[i]; 922 SPARSEDENSEMDOT(sum,xs,v,idx,n); 923 d = fshift + A->a[diag[i]+shift]; 924 n = B->i[i+1] - B->i[i]; 925 idx = B->j + B->i[i] + shift; 926 v = B->a + B->i[i] + shift; 927 SPARSEDENSEMDOT(sum,ls,v,idx,n); 928 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 929 } 930 } 931 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 932 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 933 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 934 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 935 if (flag & SOR_ZERO_INITIAL_GUESS) { 936 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 937 PetscFunctionReturn(0); 938 } 939 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 940 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 941 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 942 if (xx != bb) { 943 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 944 } else { 945 b = x; 946 } 947 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 948 xs = x + shift; /* shift by one for index start of 1 */ 949 ls = ls + shift; 950 while (its--) { 951 for (i=m-1; i>-1; i--) { 952 n = A->i[i+1] - A->i[i]; 953 PLogFlops(4*n+3); 954 idx = A->j + A->i[i] + shift; 955 v = A->a + A->i[i] + shift; 956 sum = b[i]; 957 SPARSEDENSEMDOT(sum,xs,v,idx,n); 958 d = fshift + A->a[diag[i]+shift]; 959 n = B->i[i+1] - B->i[i]; 960 idx = B->j + B->i[i] + shift; 961 v = B->a + B->i[i] + shift; 962 SPARSEDENSEMDOT(sum,ls,v,idx,n); 963 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 964 } 965 } 966 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 967 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 968 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 969 } else { 970 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 971 } 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNC__ 976 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIAIJ" 977 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 978 { 979 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 980 Mat A = mat->A,B = mat->B; 981 int ierr; 982 PetscReal isend[5],irecv[5]; 983 984 PetscFunctionBegin; 985 info->block_size = 1.0; 986 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 987 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 988 isend[3] = info->memory; isend[4] = info->mallocs; 989 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 990 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 991 isend[3] += info->memory; isend[4] += info->mallocs; 992 if (flag == MAT_LOCAL) { 993 info->nz_used = isend[0]; 994 info->nz_allocated = isend[1]; 995 info->nz_unneeded = isend[2]; 996 info->memory = isend[3]; 997 info->mallocs = isend[4]; 998 } else if (flag == MAT_GLOBAL_MAX) { 999 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1000 info->nz_used = irecv[0]; 1001 info->nz_allocated = irecv[1]; 1002 info->nz_unneeded = irecv[2]; 1003 info->memory = irecv[3]; 1004 info->mallocs = irecv[4]; 1005 } else if (flag == MAT_GLOBAL_SUM) { 1006 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1007 info->nz_used = irecv[0]; 1008 info->nz_allocated = irecv[1]; 1009 info->nz_unneeded = irecv[2]; 1010 info->memory = irecv[3]; 1011 info->mallocs = irecv[4]; 1012 } 1013 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1014 info->fill_ratio_needed = 0; 1015 info->factor_mallocs = 0; 1016 info->rows_global = (double)matin->M; 1017 info->columns_global = (double)matin->N; 1018 info->rows_local = (double)matin->m; 1019 info->columns_local = (double)matin->N; 1020 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNC__ 1025 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIAIJ" 1026 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1027 { 1028 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1029 int ierr; 1030 1031 PetscFunctionBegin; 1032 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1033 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1034 op == MAT_COLUMNS_UNSORTED || 1035 op == MAT_COLUMNS_SORTED || 1036 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1037 op == MAT_KEEP_ZEROED_ROWS || 1038 op == MAT_NEW_NONZERO_LOCATION_ERR || 1039 op == MAT_USE_INODES || 1040 op == MAT_DO_NOT_USE_INODES || 1041 op == MAT_IGNORE_ZERO_ENTRIES) { 1042 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1043 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1044 } else if (op == MAT_ROW_ORIENTED) { 1045 a->roworiented = PETSC_TRUE; 1046 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1047 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1048 } else if (op == MAT_ROWS_SORTED || 1049 op == MAT_ROWS_UNSORTED || 1050 op == MAT_SYMMETRIC || 1051 op == MAT_STRUCTURALLY_SYMMETRIC || 1052 op == MAT_YES_NEW_DIAGONALS) { 1053 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1054 } else if (op == MAT_COLUMN_ORIENTED) { 1055 a->roworiented = PETSC_FALSE; 1056 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1057 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1058 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1059 a->donotstash = PETSC_TRUE; 1060 } else if (op == MAT_NO_NEW_DIAGONALS){ 1061 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1062 } else { 1063 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNC__ 1069 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIAIJ" 1070 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1071 { 1072 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1073 1074 PetscFunctionBegin; 1075 if (m) *m = mat->rstart; 1076 if (n) *n = mat->rend; 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNC__ 1081 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIAIJ" 1082 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1083 { 1084 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1085 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1086 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1087 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1088 int *cmap,*idx_p; 1089 1090 PetscFunctionBegin; 1091 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1092 mat->getrowactive = PETSC_TRUE; 1093 1094 if (!mat->rowvalues && (idx || v)) { 1095 /* 1096 allocate enough space to hold information from the longest row. 1097 */ 1098 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1099 int max = 1,tmp; 1100 for (i=0; i<matin->m; i++) { 1101 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1102 if (max < tmp) { max = tmp; } 1103 } 1104 mat->rowvalues = (Scalar*)PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1105 mat->rowindices = (int*)(mat->rowvalues + max); 1106 } 1107 1108 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1109 lrow = row - rstart; 1110 1111 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1112 if (!v) {pvA = 0; pvB = 0;} 1113 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1114 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1115 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1116 nztot = nzA + nzB; 1117 1118 cmap = mat->garray; 1119 if (v || idx) { 1120 if (nztot) { 1121 /* Sort by increasing column numbers, assuming A and B already sorted */ 1122 int imark = -1; 1123 if (v) { 1124 *v = v_p = mat->rowvalues; 1125 for (i=0; i<nzB; i++) { 1126 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1127 else break; 1128 } 1129 imark = i; 1130 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1131 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1132 } 1133 if (idx) { 1134 *idx = idx_p = mat->rowindices; 1135 if (imark > -1) { 1136 for (i=0; i<imark; i++) { 1137 idx_p[i] = cmap[cworkB[i]]; 1138 } 1139 } else { 1140 for (i=0; i<nzB; i++) { 1141 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1142 else break; 1143 } 1144 imark = i; 1145 } 1146 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1147 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1148 } 1149 } else { 1150 if (idx) *idx = 0; 1151 if (v) *v = 0; 1152 } 1153 } 1154 *nz = nztot; 1155 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1156 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNC__ 1161 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIAIJ" 1162 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1163 { 1164 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1165 1166 PetscFunctionBegin; 1167 if (aij->getrowactive == PETSC_FALSE) { 1168 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1169 } 1170 aij->getrowactive = PETSC_FALSE; 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNC__ 1175 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIAIJ" 1176 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1177 { 1178 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1179 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1180 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1181 PetscReal sum = 0.0; 1182 Scalar *v; 1183 1184 PetscFunctionBegin; 1185 if (aij->size == 1) { 1186 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1187 } else { 1188 if (type == NORM_FROBENIUS) { 1189 v = amat->a; 1190 for (i=0; i<amat->nz; i++) { 1191 #if defined(PETSC_USE_COMPLEX) 1192 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1193 #else 1194 sum += (*v)*(*v); v++; 1195 #endif 1196 } 1197 v = bmat->a; 1198 for (i=0; i<bmat->nz; i++) { 1199 #if defined(PETSC_USE_COMPLEX) 1200 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1201 #else 1202 sum += (*v)*(*v); v++; 1203 #endif 1204 } 1205 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1206 *norm = sqrt(*norm); 1207 } else if (type == NORM_1) { /* max column norm */ 1208 PetscReal *tmp,*tmp2; 1209 int *jj,*garray = aij->garray; 1210 tmp = (PetscReal*)PetscMalloc((mat->N+1)*sizeof(PetscReal));CHKPTRQ(tmp); 1211 tmp2 = (PetscReal*)PetscMalloc((mat->N+1)*sizeof(PetscReal));CHKPTRQ(tmp2); 1212 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1213 *norm = 0.0; 1214 v = amat->a; jj = amat->j; 1215 for (j=0; j<amat->nz; j++) { 1216 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1217 } 1218 v = bmat->a; jj = bmat->j; 1219 for (j=0; j<bmat->nz; j++) { 1220 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1221 } 1222 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1223 for (j=0; j<mat->N; j++) { 1224 if (tmp2[j] > *norm) *norm = tmp2[j]; 1225 } 1226 ierr = PetscFree(tmp);CHKERRQ(ierr); 1227 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1228 } else if (type == NORM_INFINITY) { /* max row norm */ 1229 PetscReal ntemp = 0.0; 1230 for (j=0; j<aij->A->m; j++) { 1231 v = amat->a + amat->i[j] + shift; 1232 sum = 0.0; 1233 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1234 sum += PetscAbsScalar(*v); v++; 1235 } 1236 v = bmat->a + bmat->i[j] + shift; 1237 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1238 sum += PetscAbsScalar(*v); v++; 1239 } 1240 if (sum > ntemp) ntemp = sum; 1241 } 1242 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1243 } else { 1244 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1245 } 1246 } 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNC__ 1251 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIAIJ" 1252 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1253 { 1254 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1255 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1256 int ierr,shift = Aloc->indexshift; 1257 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1258 Mat B; 1259 Scalar *array; 1260 1261 PetscFunctionBegin; 1262 if (!matout && M != N) { 1263 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1264 } 1265 1266 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1267 1268 /* copy over the A part */ 1269 Aloc = (Mat_SeqAIJ*)a->A->data; 1270 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1271 row = a->rstart; 1272 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1273 for (i=0; i<m; i++) { 1274 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1275 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1276 } 1277 aj = Aloc->j; 1278 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1279 1280 /* copy over the B part */ 1281 Aloc = (Mat_SeqAIJ*)a->B->data; 1282 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1283 row = a->rstart; 1284 ct = cols = (int*)PetscMalloc((1+ai[m]-shift)*sizeof(int));CHKPTRQ(cols); 1285 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1286 for (i=0; i<m; i++) { 1287 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1288 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1289 } 1290 ierr = PetscFree(ct);CHKERRQ(ierr); 1291 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1292 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1293 if (matout) { 1294 *matout = B; 1295 } else { 1296 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1297 } 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNC__ 1302 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIAIJ" 1303 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1304 { 1305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1306 Mat a = aij->A,b = aij->B; 1307 int ierr,s1,s2,s3; 1308 1309 PetscFunctionBegin; 1310 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1311 if (rr) { 1312 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1313 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1314 /* Overlap communication with computation. */ 1315 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1316 } 1317 if (ll) { 1318 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1319 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1320 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1321 } 1322 /* scale the diagonal block */ 1323 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1324 1325 if (rr) { 1326 /* Do a scatter end and then right scale the off-diagonal block */ 1327 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1328 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1329 } 1330 1331 PetscFunctionReturn(0); 1332 } 1333 1334 1335 #undef __FUNC__ 1336 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIAIJ" 1337 int MatPrintHelp_MPIAIJ(Mat A) 1338 { 1339 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1340 int ierr; 1341 1342 PetscFunctionBegin; 1343 if (!a->rank) { 1344 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1345 } 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #undef __FUNC__ 1350 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIAIJ" 1351 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1352 { 1353 PetscFunctionBegin; 1354 *bs = 1; 1355 PetscFunctionReturn(0); 1356 } 1357 #undef __FUNC__ 1358 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIAIJ" 1359 int MatSetUnfactored_MPIAIJ(Mat A) 1360 { 1361 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1362 int ierr; 1363 1364 PetscFunctionBegin; 1365 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNC__ 1370 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIAIJ" 1371 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1372 { 1373 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1374 Mat a,b,c,d; 1375 PetscTruth flg; 1376 int ierr; 1377 1378 PetscFunctionBegin; 1379 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1380 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1381 a = matA->A; b = matA->B; 1382 c = matB->A; d = matB->B; 1383 1384 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1385 if (flg == PETSC_TRUE) { 1386 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1387 } 1388 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNC__ 1393 #define __FUNC__ /*<a name=""></a>*/"MatCopy_MPIAIJ" 1394 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1395 { 1396 int ierr; 1397 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1398 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1399 PetscTruth flg; 1400 1401 PetscFunctionBegin; 1402 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1403 if (str != SAME_NONZERO_PATTERN || !flg) { 1404 /* because of the column compression in the off-processor part of the matrix a->B, 1405 the number of columns in a->B and b->B may be different, hence we cannot call 1406 the MatCopy() directly on the two parts. If need be, we can provide a more 1407 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1408 then copying the submatrices */ 1409 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1410 } else { 1411 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1412 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1413 } 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNC__ 1418 #define __FUNC__ /*<a name="MatSetUpPreallocation_MPIAIJ"></a>*/"MatSetUpPreallocation_MPIAIJ" 1419 int MatSetUpPreallocation_MPIAIJ(Mat A) 1420 { 1421 int ierr; 1422 1423 PetscFunctionBegin; 1424 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1429 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1430 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1431 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1432 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1433 1434 /* -------------------------------------------------------------------*/ 1435 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1436 MatGetRow_MPIAIJ, 1437 MatRestoreRow_MPIAIJ, 1438 MatMult_MPIAIJ, 1439 MatMultAdd_MPIAIJ, 1440 MatMultTranspose_MPIAIJ, 1441 MatMultTransposeAdd_MPIAIJ, 1442 0, 1443 0, 1444 0, 1445 0, 1446 0, 1447 0, 1448 MatRelax_MPIAIJ, 1449 MatTranspose_MPIAIJ, 1450 MatGetInfo_MPIAIJ, 1451 MatEqual_MPIAIJ, 1452 MatGetDiagonal_MPIAIJ, 1453 MatDiagonalScale_MPIAIJ, 1454 MatNorm_MPIAIJ, 1455 MatAssemblyBegin_MPIAIJ, 1456 MatAssemblyEnd_MPIAIJ, 1457 0, 1458 MatSetOption_MPIAIJ, 1459 MatZeroEntries_MPIAIJ, 1460 MatZeroRows_MPIAIJ, 1461 0, 1462 0, 1463 0, 1464 0, 1465 MatSetUpPreallocation_MPIAIJ, 1466 0, 1467 MatGetOwnershipRange_MPIAIJ, 1468 0, 1469 0, 1470 0, 1471 0, 1472 MatDuplicate_MPIAIJ, 1473 0, 1474 0, 1475 0, 1476 0, 1477 0, 1478 MatGetSubMatrices_MPIAIJ, 1479 MatIncreaseOverlap_MPIAIJ, 1480 MatGetValues_MPIAIJ, 1481 MatCopy_MPIAIJ, 1482 MatPrintHelp_MPIAIJ, 1483 MatScale_MPIAIJ, 1484 0, 1485 0, 1486 0, 1487 MatGetBlockSize_MPIAIJ, 1488 0, 1489 0, 1490 0, 1491 0, 1492 MatFDColoringCreate_MPIAIJ, 1493 0, 1494 MatSetUnfactored_MPIAIJ, 1495 0, 1496 0, 1497 MatGetSubMatrix_MPIAIJ, 1498 MatDestroy_MPIAIJ, 1499 MatView_MPIAIJ, 1500 MatGetMaps_Petsc}; 1501 1502 /* ----------------------------------------------------------------------------------------*/ 1503 1504 EXTERN_C_BEGIN 1505 #undef __FUNC__ 1506 #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIAIJ" 1507 int MatStoreValues_MPIAIJ(Mat mat) 1508 { 1509 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1510 int ierr; 1511 1512 PetscFunctionBegin; 1513 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1514 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 EXTERN_C_END 1518 1519 EXTERN_C_BEGIN 1520 #undef __FUNC__ 1521 #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIAIJ" 1522 int MatRetrieveValues_MPIAIJ(Mat mat) 1523 { 1524 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1525 int ierr; 1526 1527 PetscFunctionBegin; 1528 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1529 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 EXTERN_C_END 1533 1534 #include "petscpc.h" 1535 EXTERN_C_BEGIN 1536 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1537 EXTERN_C_END 1538 1539 EXTERN_C_BEGIN 1540 #undef __FUNC__ 1541 #define __FUNC__ /*<a name=""></a>*/"MatCreate_MPIAIJ" 1542 int MatCreate_MPIAIJ(Mat B) 1543 { 1544 Mat_MPIAIJ *b; 1545 int ierr,i,size; 1546 1547 PetscFunctionBegin; 1548 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1549 1550 B->data = (void*)(b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1551 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1552 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1553 B->factor = 0; 1554 B->assembled = PETSC_FALSE; 1555 B->mapping = 0; 1556 1557 B->insertmode = NOT_SET_VALUES; 1558 b->size = size; 1559 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1560 1561 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1562 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1563 1564 /* the information in the maps duplicates the information computed below, eventually 1565 we should remove the duplicate information that is not contained in the maps */ 1566 ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1567 ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1568 1569 /* build local table of row and column ownerships */ 1570 b->rowners = (int*)PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1571 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1572 b->cowners = b->rowners + b->size + 2; 1573 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1574 b->rowners[0] = 0; 1575 for (i=2; i<=b->size; i++) { 1576 b->rowners[i] += b->rowners[i-1]; 1577 } 1578 b->rstart = b->rowners[b->rank]; 1579 b->rend = b->rowners[b->rank+1]; 1580 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1581 b->cowners[0] = 0; 1582 for (i=2; i<=b->size; i++) { 1583 b->cowners[i] += b->cowners[i-1]; 1584 } 1585 b->cstart = b->cowners[b->rank]; 1586 b->cend = b->cowners[b->rank+1]; 1587 1588 /* build cache for off array entries formed */ 1589 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1590 b->donotstash = PETSC_FALSE; 1591 b->colmap = 0; 1592 b->garray = 0; 1593 b->roworiented = PETSC_TRUE; 1594 1595 /* stuff used for matrix vector multiply */ 1596 b->lvec = PETSC_NULL; 1597 b->Mvctx = PETSC_NULL; 1598 1599 /* stuff for MatGetRow() */ 1600 b->rowindices = 0; 1601 b->rowvalues = 0; 1602 b->getrowactive = PETSC_FALSE; 1603 1604 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1605 "MatStoreValues_MPIAIJ", 1606 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1607 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1608 "MatRetrieveValues_MPIAIJ", 1609 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1610 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1611 "MatGetDiagonalBlock_MPIAIJ", 1612 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1613 PetscFunctionReturn(0); 1614 } 1615 EXTERN_C_END 1616 1617 #undef __FUNC__ 1618 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIAIJ" 1619 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1620 { 1621 Mat mat; 1622 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1623 int ierr,len = 0; 1624 1625 PetscFunctionBegin; 1626 *newmat = 0; 1627 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1628 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1629 a = (Mat_MPIAIJ*)mat->data; 1630 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1631 mat->factor = matin->factor; 1632 mat->assembled = PETSC_TRUE; 1633 mat->insertmode = NOT_SET_VALUES; 1634 mat->preallocated = PETSC_TRUE; 1635 1636 a->rstart = oldmat->rstart; 1637 a->rend = oldmat->rend; 1638 a->cstart = oldmat->cstart; 1639 a->cend = oldmat->cend; 1640 a->size = oldmat->size; 1641 a->rank = oldmat->rank; 1642 a->donotstash = oldmat->donotstash; 1643 a->roworiented = oldmat->roworiented; 1644 a->rowindices = 0; 1645 a->rowvalues = 0; 1646 a->getrowactive = PETSC_FALSE; 1647 1648 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1649 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1650 if (oldmat->colmap) { 1651 #if defined (PETSC_USE_CTABLE) 1652 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1653 #else 1654 a->colmap = (int*)PetscMalloc((mat->N)*sizeof(int));CHKPTRQ(a->colmap); 1655 PLogObjectMemory(mat,(mat->N)*sizeof(int)); 1656 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1657 #endif 1658 } else a->colmap = 0; 1659 if (oldmat->garray) { 1660 len = oldmat->B->n; 1661 a->garray = (int*)PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1662 PLogObjectMemory(mat,len*sizeof(int)); 1663 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1664 } else a->garray = 0; 1665 1666 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1667 PLogObjectParent(mat,a->lvec); 1668 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1669 PLogObjectParent(mat,a->Mvctx); 1670 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1671 PLogObjectParent(mat,a->A); 1672 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1673 PLogObjectParent(mat,a->B); 1674 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1675 *newmat = mat; 1676 PetscFunctionReturn(0); 1677 } 1678 1679 #include "petscsys.h" 1680 1681 EXTERN_C_BEGIN 1682 #undef __FUNC__ 1683 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIAIJ" 1684 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1685 { 1686 Mat A; 1687 Scalar *vals,*svals; 1688 MPI_Comm comm = ((PetscObject)viewer)->comm; 1689 MPI_Status status; 1690 int i,nz,ierr,j,rstart,rend,fd; 1691 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1692 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1693 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1694 1695 PetscFunctionBegin; 1696 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1697 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1698 if (!rank) { 1699 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1700 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1701 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1702 if (header[3] < 0) { 1703 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1704 } 1705 } 1706 1707 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1708 M = header[1]; N = header[2]; 1709 /* determine ownership of all rows */ 1710 m = M/size + ((M % size) > rank); 1711 rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1712 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1713 rowners[0] = 0; 1714 for (i=2; i<=size; i++) { 1715 rowners[i] += rowners[i-1]; 1716 } 1717 rstart = rowners[rank]; 1718 rend = rowners[rank+1]; 1719 1720 /* distribute row lengths to all processors */ 1721 ourlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(ourlens); 1722 offlens = ourlens + (rend-rstart); 1723 if (!rank) { 1724 rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths); 1725 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1726 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 1727 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1728 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1729 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1730 } else { 1731 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1732 } 1733 1734 if (!rank) { 1735 /* calculate the number of nonzeros on each processor */ 1736 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 1737 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1738 for (i=0; i<size; i++) { 1739 for (j=rowners[i]; j< rowners[i+1]; j++) { 1740 procsnz[i] += rowlengths[j]; 1741 } 1742 } 1743 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1744 1745 /* determine max buffer needed and allocate it */ 1746 maxnz = 0; 1747 for (i=0; i<size; i++) { 1748 maxnz = PetscMax(maxnz,procsnz[i]); 1749 } 1750 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 1751 1752 /* read in my part of the matrix column indices */ 1753 nz = procsnz[0]; 1754 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 1755 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1756 1757 /* read in every one elses and ship off */ 1758 for (i=1; i<size; i++) { 1759 nz = procsnz[i]; 1760 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1761 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1762 } 1763 ierr = PetscFree(cols);CHKERRQ(ierr); 1764 } else { 1765 /* determine buffer space needed for message */ 1766 nz = 0; 1767 for (i=0; i<m; i++) { 1768 nz += ourlens[i]; 1769 } 1770 mycols = (int*)PetscMalloc((nz+1)*sizeof(int));CHKPTRQ(mycols); 1771 1772 /* receive message of column indices*/ 1773 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1774 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1775 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1776 } 1777 1778 /* determine column ownership if matrix is not square */ 1779 if (N != M) { 1780 n = N/size + ((N % size) > rank); 1781 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1782 cstart = cend - n; 1783 } else { 1784 cstart = rstart; 1785 cend = rend; 1786 n = cend - cstart; 1787 } 1788 1789 /* loop over local rows, determining number of off diagonal entries */ 1790 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1791 jj = 0; 1792 for (i=0; i<m; i++) { 1793 for (j=0; j<ourlens[i]; j++) { 1794 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1795 jj++; 1796 } 1797 } 1798 1799 /* create our matrix */ 1800 for (i=0; i<m; i++) { 1801 ourlens[i] -= offlens[i]; 1802 } 1803 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1804 A = *newmat; 1805 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1806 for (i=0; i<m; i++) { 1807 ourlens[i] += offlens[i]; 1808 } 1809 1810 if (!rank) { 1811 vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals); 1812 1813 /* read in my part of the matrix numerical values */ 1814 nz = procsnz[0]; 1815 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1816 1817 /* insert into matrix */ 1818 jj = rstart; 1819 smycols = mycols; 1820 svals = vals; 1821 for (i=0; i<m; i++) { 1822 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1823 smycols += ourlens[i]; 1824 svals += ourlens[i]; 1825 jj++; 1826 } 1827 1828 /* read in other processors and ship out */ 1829 for (i=1; i<size; i++) { 1830 nz = procsnz[i]; 1831 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1832 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1833 } 1834 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1835 } else { 1836 /* receive numeric values */ 1837 vals = (Scalar*)PetscMalloc((nz+1)*sizeof(Scalar));CHKPTRQ(vals); 1838 1839 /* receive message of values*/ 1840 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1841 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1842 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1843 1844 /* insert into matrix */ 1845 jj = rstart; 1846 smycols = mycols; 1847 svals = vals; 1848 for (i=0; i<m; i++) { 1849 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1850 smycols += ourlens[i]; 1851 svals += ourlens[i]; 1852 jj++; 1853 } 1854 } 1855 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1856 ierr = PetscFree(vals);CHKERRQ(ierr); 1857 ierr = PetscFree(mycols);CHKERRQ(ierr); 1858 ierr = PetscFree(rowners);CHKERRQ(ierr); 1859 1860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1862 PetscFunctionReturn(0); 1863 } 1864 EXTERN_C_END 1865 1866 #undef __FUNC__ 1867 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIAIJ" 1868 /* 1869 Not great since it makes two copies of the submatrix, first an SeqAIJ 1870 in local and then by concatenating the local matrices the end result. 1871 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1872 */ 1873 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1874 { 1875 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1876 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1877 Mat *local,M,Mreuse; 1878 Scalar *vwork,*aa; 1879 MPI_Comm comm = mat->comm; 1880 Mat_SeqAIJ *aij; 1881 1882 1883 PetscFunctionBegin; 1884 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1886 1887 if (call == MAT_REUSE_MATRIX) { 1888 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1889 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1890 local = &Mreuse; 1891 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1892 } else { 1893 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1894 Mreuse = *local; 1895 ierr = PetscFree(local);CHKERRQ(ierr); 1896 } 1897 1898 /* 1899 m - number of local rows 1900 n - number of columns (same on all processors) 1901 rstart - first row in new global matrix generated 1902 */ 1903 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1904 if (call == MAT_INITIAL_MATRIX) { 1905 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1906 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1907 ii = aij->i; 1908 jj = aij->j; 1909 1910 /* 1911 Determine the number of non-zeros in the diagonal and off-diagonal 1912 portions of the matrix in order to do correct preallocation 1913 */ 1914 1915 /* first get start and end of "diagonal" columns */ 1916 if (csize == PETSC_DECIDE) { 1917 nlocal = n/size + ((n % size) > rank); 1918 } else { 1919 nlocal = csize; 1920 } 1921 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1922 rstart = rend - nlocal; 1923 if (rank == size - 1 && rend != n) { 1924 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1925 } 1926 1927 /* next, compute all the lengths */ 1928 dlens = (int*)PetscMalloc((2*m+1)*sizeof(int));CHKPTRQ(dlens); 1929 olens = dlens + m; 1930 for (i=0; i<m; i++) { 1931 jend = ii[i+1] - ii[i]; 1932 olen = 0; 1933 dlen = 0; 1934 for (j=0; j<jend; j++) { 1935 if (*jj < rstart || *jj >= rend) olen++; 1936 else dlen++; 1937 jj++; 1938 } 1939 olens[i] = olen; 1940 dlens[i] = dlen; 1941 } 1942 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1943 ierr = PetscFree(dlens);CHKERRQ(ierr); 1944 } else { 1945 int ml,nl; 1946 1947 M = *newmat; 1948 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1949 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1950 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1951 /* 1952 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1953 rather than the slower MatSetValues(). 1954 */ 1955 M->was_assembled = PETSC_TRUE; 1956 M->assembled = PETSC_FALSE; 1957 } 1958 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1959 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1960 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1961 ii = aij->i; 1962 jj = aij->j; 1963 aa = aij->a; 1964 for (i=0; i<m; i++) { 1965 row = rstart + i; 1966 nz = ii[i+1] - ii[i]; 1967 cwork = jj; jj += nz; 1968 vwork = aa; aa += nz; 1969 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1970 } 1971 1972 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1973 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1974 *newmat = M; 1975 1976 /* save submatrix used in processor for next request */ 1977 if (call == MAT_INITIAL_MATRIX) { 1978 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1979 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1980 } 1981 1982 PetscFunctionReturn(0); 1983 } 1984 1985 #undef __FUNC__ 1986 #define __FUNC__ /*<a name=""></a>*/"MatMPIAIJSetPreallocation" 1987 /*@C 1988 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1989 (the default parallel PETSc format). For good matrix assembly performance 1990 the user should preallocate the matrix storage by setting the parameters 1991 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1992 performance can be increased by more than a factor of 50. 1993 1994 Collective on MPI_Comm 1995 1996 Input Parameters: 1997 + A - the matrix 1998 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1999 (same value is used for all local rows) 2000 . d_nnz - array containing the number of nonzeros in the various rows of the 2001 DIAGONAL portion of the local submatrix (possibly different for each row) 2002 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2003 The size of this array is equal to the number of local rows, i.e 'm'. 2004 You must leave room for the diagonal entry even if it is zero. 2005 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2006 submatrix (same value is used for all local rows). 2007 - o_nnz - array containing the number of nonzeros in the various rows of the 2008 OFF-DIAGONAL portion of the local submatrix (possibly different for 2009 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2010 structure. The size of this array is equal to the number 2011 of local rows, i.e 'm'. 2012 2013 The AIJ format (also called the Yale sparse matrix format or 2014 compressed row storage), is fully compatible with standard Fortran 77 2015 storage. That is, the stored row and column indices can begin at 2016 either one (as in Fortran) or zero. See the users manual for details. 2017 2018 The user MUST specify either the local or global matrix dimensions 2019 (possibly both). 2020 2021 The parallel matrix is partitioned such that the first m0 rows belong to 2022 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2023 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2024 2025 The DIAGONAL portion of the local submatrix of a processor can be defined 2026 as the submatrix which is obtained by extraction the part corresponding 2027 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2028 first row that belongs to the processor, and r2 is the last row belonging 2029 to the this processor. This is a square mxm matrix. The remaining portion 2030 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2031 2032 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2033 2034 By default, this format uses inodes (identical nodes) when possible. 2035 We search for consecutive rows with the same nonzero structure, thereby 2036 reusing matrix information to achieve increased efficiency. 2037 2038 Options Database Keys: 2039 + -mat_aij_no_inode - Do not use inodes 2040 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2041 - -mat_aij_oneindex - Internally use indexing starting at 1 2042 rather than 0. Note that when calling MatSetValues(), 2043 the user still MUST index entries starting at 0! 2044 2045 Example usage: 2046 2047 Consider the following 8x8 matrix with 34 non-zero values, that is 2048 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2049 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2050 as follows: 2051 2052 .vb 2053 1 2 0 | 0 3 0 | 0 4 2054 Proc0 0 5 6 | 7 0 0 | 8 0 2055 9 0 10 | 11 0 0 | 12 0 2056 ------------------------------------- 2057 13 0 14 | 15 16 17 | 0 0 2058 Proc1 0 18 0 | 19 20 21 | 0 0 2059 0 0 0 | 22 23 0 | 24 0 2060 ------------------------------------- 2061 Proc2 25 26 27 | 0 0 28 | 29 0 2062 30 0 0 | 31 32 33 | 0 34 2063 .ve 2064 2065 This can be represented as a collection of submatrices as: 2066 2067 .vb 2068 A B C 2069 D E F 2070 G H I 2071 .ve 2072 2073 Where the submatrices A,B,C are owned by proc0, D,E,F are 2074 owned by proc1, G,H,I are owned by proc2. 2075 2076 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2077 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2078 The 'M','N' parameters are 8,8, and have the same values on all procs. 2079 2080 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2081 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2082 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2083 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2084 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2085 matrix, ans [DF] as another SeqAIJ matrix. 2086 2087 When d_nz, o_nz parameters are specified, d_nz storage elements are 2088 allocated for every row of the local diagonal submatrix, and o_nz 2089 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2090 One way to choose d_nz and o_nz is to use the max nonzerors per local 2091 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2092 In this case, the values of d_nz,o_nz are: 2093 .vb 2094 proc0 : dnz = 2, o_nz = 2 2095 proc1 : dnz = 3, o_nz = 2 2096 proc2 : dnz = 1, o_nz = 4 2097 .ve 2098 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2099 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2100 for proc3. i.e we are using 12+15+10=37 storage locations to store 2101 34 values. 2102 2103 When d_nnz, o_nnz parameters are specified, the storage is specified 2104 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2105 In the above case the values for d_nnz,o_nnz are: 2106 .vb 2107 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2108 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2109 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2110 .ve 2111 Here the space allocated is sum of all the above values i.e 34, and 2112 hence pre-allocation is perfect. 2113 2114 Level: intermediate 2115 2116 .keywords: matrix, aij, compressed row, sparse, parallel 2117 2118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2119 @*/ 2120 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2121 { 2122 Mat_MPIAIJ *b; 2123 int ierr,i; 2124 PetscTruth flg2; 2125 2126 PetscFunctionBegin; 2127 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2128 if (!flg2) PetscFunctionReturn(0); 2129 B->preallocated = PETSC_TRUE; 2130 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -2: value %d",d_nz); 2131 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -2: value %d",o_nz); 2132 if (d_nnz) { 2133 for (i=0; i<B->m; i++) { 2134 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2135 } 2136 } 2137 if (o_nnz) { 2138 for (i=0; i<B->m; i++) { 2139 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2140 } 2141 } 2142 b = (Mat_MPIAIJ*)B->data; 2143 2144 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2145 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2146 PLogObjectParent(B,b->A); 2147 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2148 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2149 PLogObjectParent(B,b->B); 2150 2151 PetscFunctionReturn(0); 2152 } 2153 2154 #undef __FUNC__ 2155 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIAIJ" 2156 /*@C 2157 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2158 (the default parallel PETSc format). For good matrix assembly performance 2159 the user should preallocate the matrix storage by setting the parameters 2160 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2161 performance can be increased by more than a factor of 50. 2162 2163 Collective on MPI_Comm 2164 2165 Input Parameters: 2166 + comm - MPI communicator 2167 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2168 This value should be the same as the local size used in creating the 2169 y vector for the matrix-vector product y = Ax. 2170 . n - This value should be the same as the local size used in creating the 2171 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2172 calculated if N is given) For square matrices n is almost always m. 2173 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2174 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2175 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2176 (same value is used for all local rows) 2177 . d_nnz - array containing the number of nonzeros in the various rows of the 2178 DIAGONAL portion of the local submatrix (possibly different for each row) 2179 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2180 The size of this array is equal to the number of local rows, i.e 'm'. 2181 You must leave room for the diagonal entry even if it is zero. 2182 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2183 submatrix (same value is used for all local rows). 2184 - o_nnz - array containing the number of nonzeros in the various rows of the 2185 OFF-DIAGONAL portion of the local submatrix (possibly different for 2186 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2187 structure. The size of this array is equal to the number 2188 of local rows, i.e 'm'. 2189 2190 Output Parameter: 2191 . A - the matrix 2192 2193 Notes: 2194 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2195 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2196 storage requirements for this matrix. 2197 2198 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2199 processor than it must be used on all processors that share the object for 2200 that argument. 2201 2202 The AIJ format (also called the Yale sparse matrix format or 2203 compressed row storage), is fully compatible with standard Fortran 77 2204 storage. That is, the stored row and column indices can begin at 2205 either one (as in Fortran) or zero. See the users manual for details. 2206 2207 The user MUST specify either the local or global matrix dimensions 2208 (possibly both). 2209 2210 The parallel matrix is partitioned such that the first m0 rows belong to 2211 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2212 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2213 2214 The DIAGONAL portion of the local submatrix of a processor can be defined 2215 as the submatrix which is obtained by extraction the part corresponding 2216 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2217 first row that belongs to the processor, and r2 is the last row belonging 2218 to the this processor. This is a square mxm matrix. The remaining portion 2219 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2220 2221 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2222 2223 By default, this format uses inodes (identical nodes) when possible. 2224 We search for consecutive rows with the same nonzero structure, thereby 2225 reusing matrix information to achieve increased efficiency. 2226 2227 Options Database Keys: 2228 + -mat_aij_no_inode - Do not use inodes 2229 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2230 - -mat_aij_oneindex - Internally use indexing starting at 1 2231 rather than 0. Note that when calling MatSetValues(), 2232 the user still MUST index entries starting at 0! 2233 2234 2235 Example usage: 2236 2237 Consider the following 8x8 matrix with 34 non-zero values, that is 2238 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2239 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2240 as follows: 2241 2242 .vb 2243 1 2 0 | 0 3 0 | 0 4 2244 Proc0 0 5 6 | 7 0 0 | 8 0 2245 9 0 10 | 11 0 0 | 12 0 2246 ------------------------------------- 2247 13 0 14 | 15 16 17 | 0 0 2248 Proc1 0 18 0 | 19 20 21 | 0 0 2249 0 0 0 | 22 23 0 | 24 0 2250 ------------------------------------- 2251 Proc2 25 26 27 | 0 0 28 | 29 0 2252 30 0 0 | 31 32 33 | 0 34 2253 .ve 2254 2255 This can be represented as a collection of submatrices as: 2256 2257 .vb 2258 A B C 2259 D E F 2260 G H I 2261 .ve 2262 2263 Where the submatrices A,B,C are owned by proc0, D,E,F are 2264 owned by proc1, G,H,I are owned by proc2. 2265 2266 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2267 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2268 The 'M','N' parameters are 8,8, and have the same values on all procs. 2269 2270 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2271 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2272 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2273 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2274 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2275 matrix, ans [DF] as another SeqAIJ matrix. 2276 2277 When d_nz, o_nz parameters are specified, d_nz storage elements are 2278 allocated for every row of the local diagonal submatrix, and o_nz 2279 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2280 One way to choose d_nz and o_nz is to use the max nonzerors per local 2281 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2282 In this case, the values of d_nz,o_nz are: 2283 .vb 2284 proc0 : dnz = 2, o_nz = 2 2285 proc1 : dnz = 3, o_nz = 2 2286 proc2 : dnz = 1, o_nz = 4 2287 .ve 2288 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2289 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2290 for proc3. i.e we are using 12+15+10=37 storage locations to store 2291 34 values. 2292 2293 When d_nnz, o_nnz parameters are specified, the storage is specified 2294 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2295 In the above case the values for d_nnz,o_nnz are: 2296 .vb 2297 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2298 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2299 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2300 .ve 2301 Here the space allocated is sum of all the above values i.e 34, and 2302 hence pre-allocation is perfect. 2303 2304 Level: intermediate 2305 2306 .keywords: matrix, aij, compressed row, sparse, parallel 2307 2308 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2309 @*/ 2310 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2311 { 2312 int ierr,size; 2313 2314 PetscFunctionBegin; 2315 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2316 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2317 if (size > 1) { 2318 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2319 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2320 } else { 2321 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2322 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2323 } 2324 PetscFunctionReturn(0); 2325 } 2326