1 /*$Id: mpiaij.c,v 1.321 2000/09/28 21:11:06 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ /*<a name="CreateColmap_MPIAIJ_Private"></a>*/"CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 aij->colmap = (int*)PetscMalloc((mat->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 36 PLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 Scalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(Scalar))+(am+1)*sizeof(int); \ 76 new_a = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(Scalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 Scalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(Scalar))+(bm+1)*sizeof(int); \ 150 new_a = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(Scalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNC__ 193 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 Scalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 Scalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 Scalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 Scalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNC__ 279 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNC__ 322 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNC__ 349 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 Scalar *val; 356 InsertMode addv = mat->insertmode; 357 358 PetscFunctionBegin; 359 if (!aij->donotstash) { 360 while (1) { 361 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 362 if (!flg) break; 363 364 for (i=0; i<n;) { 365 /* Now identify the consecutive vals belonging to the same row */ 366 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 367 if (j < n) ncols = j-i; 368 else ncols = n-i; 369 /* Now assemble all these values with a single function call */ 370 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 371 i = j; 372 } 373 } 374 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 375 } 376 377 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 378 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 379 380 /* determine if any processor has disassembled, if so we must 381 also disassemble ourselfs, in order that we may reassemble. */ 382 /* 383 if nonzero structure of submatrix B cannot change then we know that 384 no processor disassembled thus we can skip this stuff 385 */ 386 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 387 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 388 if (mat->was_assembled && !other_disassembled) { 389 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNC__ 407 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIAIJ" 408 int MatZeroEntries_MPIAIJ(Mat A) 409 { 410 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 411 int ierr; 412 413 PetscFunctionBegin; 414 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 415 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNC__ 420 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIAIJ" 421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 425 int *procs,*nprocs,j,found,idx,nsends,*work,row; 426 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 427 int *rvalues,tag = A->tag,count,base,slen,n,*source; 428 int *lens,imdex,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 434 PetscFunctionBegin; 435 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 436 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 437 438 /* first count number of contributors to each processor */ 439 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 440 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 441 procs = nprocs + size; 442 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 443 for (i=0; i<N; i++) { 444 idx = rows[i]; 445 found = 0; 446 for (j=0; j<size; j++) { 447 if (idx >= owners[j] && idx < owners[j+1]) { 448 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 449 } 450 } 451 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 452 } 453 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 454 455 /* inform other processors of number of messages and max length*/ 456 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 457 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 458 nrecvs = work[size+rank]; 459 nmax = work[rank]; 460 ierr = PetscFree(work);CHKERRQ(ierr); 461 462 /* post receives: */ 463 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 464 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 465 for (i=0; i<nrecvs; i++) { 466 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 467 } 468 469 /* do sends: 470 1) starts[i] gives the starting index in svalues for stuff going to 471 the ith processor 472 */ 473 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 474 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 475 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 476 starts[0] = 0; 477 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 478 for (i=0; i<N; i++) { 479 svalues[starts[owner[i]]++] = rows[i]; 480 } 481 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 482 483 starts[0] = 0; 484 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 485 count = 0; 486 for (i=0; i<size; i++) { 487 if (procs[i]) { 488 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 489 } 490 } 491 ierr = PetscFree(starts);CHKERRQ(ierr); 492 493 base = owners[rank]; 494 495 /* wait on receives */ 496 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 497 source = lens + nrecvs; 498 count = nrecvs; slen = 0; 499 while (count) { 500 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 501 /* unpack receives into our local space */ 502 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 503 source[imdex] = recv_status.MPI_SOURCE; 504 lens[imdex] = n; 505 slen += n; 506 count--; 507 } 508 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 509 510 /* move the data into the send scatter */ 511 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 512 count = 0; 513 for (i=0; i<nrecvs; i++) { 514 values = rvalues + i*nmax; 515 for (j=0; j<lens[i]; j++) { 516 lrows[count++] = values[j] - base; 517 } 518 } 519 ierr = PetscFree(rvalues);CHKERRQ(ierr); 520 ierr = PetscFree(lens);CHKERRQ(ierr); 521 ierr = PetscFree(owner);CHKERRQ(ierr); 522 ierr = PetscFree(nprocs);CHKERRQ(ierr); 523 524 /* actually zap the local rows */ 525 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 526 PLogObjectParent(A,istmp); 527 528 /* 529 Zero the required rows. If the "diagonal block" of the matrix 530 is square and the user wishes to set the diagonal we use seperate 531 code so that MatSetValues() is not called for each diagonal allocating 532 new memory, thus calling lots of mallocs and slowing things down. 533 534 Contributed by: Mathew Knepley 535 */ 536 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 537 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 538 if (diag && (l->A->M == l->A->N)) { 539 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 540 } else if (diag) { 541 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 542 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 543 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 544 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 545 } 546 for (i = 0; i < slen; i++) { 547 row = lrows[i] + rstart; 548 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 549 } 550 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 } else { 553 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 554 } 555 ierr = ISDestroy(istmp);CHKERRQ(ierr); 556 ierr = PetscFree(lrows);CHKERRQ(ierr); 557 558 /* wait on sends */ 559 if (nsends) { 560 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 561 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 562 ierr = PetscFree(send_status);CHKERRQ(ierr); 563 } 564 ierr = PetscFree(send_waits);CHKERRQ(ierr); 565 ierr = PetscFree(svalues);CHKERRQ(ierr); 566 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNC__ 571 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIAIJ" 572 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 573 { 574 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 575 int ierr,nt; 576 577 PetscFunctionBegin; 578 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 579 if (nt != A->n) { 580 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 581 } 582 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 583 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 584 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNC__ 590 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIAIJ" 591 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 int ierr; 595 596 PetscFunctionBegin; 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNC__ 605 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIAIJ" 606 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 609 int ierr; 610 611 PetscFunctionBegin; 612 /* do nondiagonal part */ 613 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 614 /* send it on its way */ 615 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 /* do local part */ 617 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 618 /* receive remote parts: note this assumes the values are not actually */ 619 /* inserted in yy until the next line, which is true for my implementation*/ 620 /* but is not perhaps always true. */ 621 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNC__ 626 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIAIJ" 627 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 628 { 629 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 630 int ierr; 631 632 PetscFunctionBegin; 633 /* do nondiagonal part */ 634 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 635 /* send it on its way */ 636 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 637 /* do local part */ 638 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 639 /* receive remote parts: note this assumes the values are not actually */ 640 /* inserted in yy until the next line, which is true for my implementation*/ 641 /* but is not perhaps always true. */ 642 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 643 PetscFunctionReturn(0); 644 } 645 646 /* 647 This only works correctly for square matrices where the subblock A->A is the 648 diagonal block 649 */ 650 #undef __FUNC__ 651 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIAIJ" 652 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 653 { 654 int ierr; 655 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 656 657 PetscFunctionBegin; 658 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 659 if (a->rstart != a->cstart || a->rend != a->cend) { 660 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 661 } 662 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNC__ 667 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIAIJ" 668 int MatScale_MPIAIJ(Scalar *aa,Mat A) 669 { 670 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 671 int ierr; 672 673 PetscFunctionBegin; 674 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 675 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNC__ 680 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIAIJ" 681 int MatDestroy_MPIAIJ(Mat mat) 682 { 683 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 684 int ierr; 685 686 PetscFunctionBegin; 687 #if defined(PETSC_USE_LOG) 688 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 689 #endif 690 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 691 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 692 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 694 #if defined (PETSC_USE_CTABLE) 695 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 696 #else 697 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 698 #endif 699 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 700 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 701 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 702 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 703 ierr = PetscFree(aij);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNC__ 708 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ_ASCIIorDraworSocket" 709 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 710 { 711 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 712 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 713 int ierr,format,shift = C->indexshift,rank = aij->rank,size = aij->size; 714 PetscTruth isdraw,isascii,flg; 715 Viewer sviewer; 716 717 PetscFunctionBegin; 718 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 719 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 720 if (isascii) { 721 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 722 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 723 MatInfo info; 724 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 725 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 726 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 727 if (flg) { 728 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 729 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 730 } else { 731 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 732 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 733 } 734 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 735 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 736 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 737 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 738 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 739 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 742 PetscFunctionReturn(0); 743 } 744 } else if (isdraw) { 745 Draw draw; 746 PetscTruth isnull; 747 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 748 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 749 } 750 751 if (size == 1) { 752 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 753 } else { 754 /* assemble the entire matrix onto first processor. */ 755 Mat A; 756 Mat_SeqAIJ *Aloc; 757 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 758 Scalar *a; 759 760 if (!rank) { 761 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 762 } else { 763 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 764 } 765 PLogObjectParent(mat,A); 766 767 /* copy over the A part */ 768 Aloc = (Mat_SeqAIJ*)aij->A->data; 769 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 770 row = aij->rstart; 771 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 772 for (i=0; i<m; i++) { 773 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 774 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 775 } 776 aj = Aloc->j; 777 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 778 779 /* copy over the B part */ 780 Aloc = (Mat_SeqAIJ*)aij->B->data; 781 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 782 row = aij->rstart; 783 ct = cols = (int*)PetscMalloc((ai[m]+1)*sizeof(int));CHKPTRQ(cols); 784 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 785 for (i=0; i<m; i++) { 786 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 787 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 788 } 789 ierr = PetscFree(ct);CHKERRQ(ierr); 790 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 791 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 792 /* 793 Everyone has to call to draw the matrix since the graphics waits are 794 synchronized across all processors that share the Draw object 795 */ 796 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 797 if (!rank) { 798 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 799 } 800 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 801 ierr = MatDestroy(A);CHKERRQ(ierr); 802 } 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNC__ 807 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIAIJ" 808 int MatView_MPIAIJ(Mat mat,Viewer viewer) 809 { 810 int ierr; 811 PetscTruth isascii,isdraw,issocket,isbinary; 812 813 PetscFunctionBegin; 814 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 815 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 816 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 817 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 818 if (isascii || isdraw || isbinary || issocket) { 819 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 820 } else { 821 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 822 } 823 PetscFunctionReturn(0); 824 } 825 826 /* 827 This has to provide several versions. 828 829 2) a) use only local smoothing updating outer values only once. 830 b) local smoothing updating outer values each inner iteration 831 3) color updating out values betwen colors. 832 */ 833 #undef __FUNC__ 834 #define __FUNC__ /*<a name=""></a>*/"MatRelax_MPIAIJ" 835 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx) 836 { 837 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 838 Mat AA = mat->A,BB = mat->B; 839 Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data; 840 Scalar *b,*x,*xs,*ls,d,*v,sum; 841 int ierr,*idx,*diag; 842 int n = matin->n,m = matin->m,i,shift = A->indexshift; 843 844 PetscFunctionBegin; 845 if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);} 846 diag = A->diag; 847 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 848 if (flag & SOR_ZERO_INITIAL_GUESS) { 849 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 853 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 854 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 855 if (xx != bb) { 856 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 857 } else { 858 b = x; 859 } 860 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 861 xs = x + shift; /* shift by one for index start of 1 */ 862 ls = ls + shift; 863 while (its--) { 864 /* go down through the rows */ 865 for (i=0; i<m; i++) { 866 n = A->i[i+1] - A->i[i]; 867 PLogFlops(4*n+3); 868 idx = A->j + A->i[i] + shift; 869 v = A->a + A->i[i] + shift; 870 sum = b[i]; 871 SPARSEDENSEMDOT(sum,xs,v,idx,n); 872 d = fshift + A->a[diag[i]+shift]; 873 n = B->i[i+1] - B->i[i]; 874 idx = B->j + B->i[i] + shift; 875 v = B->a + B->i[i] + shift; 876 SPARSEDENSEMDOT(sum,ls,v,idx,n); 877 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 878 } 879 /* come up through the rows */ 880 for (i=m-1; i>-1; i--) { 881 n = A->i[i+1] - A->i[i]; 882 PLogFlops(4*n+3); 883 idx = A->j + A->i[i] + shift; 884 v = A->a + A->i[i] + shift; 885 sum = b[i]; 886 SPARSEDENSEMDOT(sum,xs,v,idx,n); 887 d = fshift + A->a[diag[i]+shift]; 888 n = B->i[i+1] - B->i[i]; 889 idx = B->j + B->i[i] + shift; 890 v = B->a + B->i[i] + shift; 891 SPARSEDENSEMDOT(sum,ls,v,idx,n); 892 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 893 } 894 } 895 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 896 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 897 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 898 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 899 if (flag & SOR_ZERO_INITIAL_GUESS) { 900 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 904 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 905 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 906 if (xx != bb) { 907 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 908 } else { 909 b = x; 910 } 911 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 912 xs = x + shift; /* shift by one for index start of 1 */ 913 ls = ls + shift; 914 while (its--) { 915 for (i=0; i<m; i++) { 916 n = A->i[i+1] - A->i[i]; 917 PLogFlops(4*n+3); 918 idx = A->j + A->i[i] + shift; 919 v = A->a + A->i[i] + shift; 920 sum = b[i]; 921 SPARSEDENSEMDOT(sum,xs,v,idx,n); 922 d = fshift + A->a[diag[i]+shift]; 923 n = B->i[i+1] - B->i[i]; 924 idx = B->j + B->i[i] + shift; 925 v = B->a + B->i[i] + shift; 926 SPARSEDENSEMDOT(sum,ls,v,idx,n); 927 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 928 } 929 } 930 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 931 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 932 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 933 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 934 if (flag & SOR_ZERO_INITIAL_GUESS) { 935 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 939 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 940 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 941 if (xx != bb) { 942 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 943 } else { 944 b = x; 945 } 946 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 947 xs = x + shift; /* shift by one for index start of 1 */ 948 ls = ls + shift; 949 while (its--) { 950 for (i=m-1; i>-1; i--) { 951 n = A->i[i+1] - A->i[i]; 952 PLogFlops(4*n+3); 953 idx = A->j + A->i[i] + shift; 954 v = A->a + A->i[i] + shift; 955 sum = b[i]; 956 SPARSEDENSEMDOT(sum,xs,v,idx,n); 957 d = fshift + A->a[diag[i]+shift]; 958 n = B->i[i+1] - B->i[i]; 959 idx = B->j + B->i[i] + shift; 960 v = B->a + B->i[i] + shift; 961 SPARSEDENSEMDOT(sum,ls,v,idx,n); 962 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 963 } 964 } 965 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 966 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 967 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 968 } else { 969 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 970 } 971 PetscFunctionReturn(0); 972 } 973 974 #undef __FUNC__ 975 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIAIJ" 976 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 977 { 978 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 979 Mat A = mat->A,B = mat->B; 980 int ierr; 981 PetscReal isend[5],irecv[5]; 982 983 PetscFunctionBegin; 984 info->block_size = 1.0; 985 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 986 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 987 isend[3] = info->memory; isend[4] = info->mallocs; 988 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 989 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 990 isend[3] += info->memory; isend[4] += info->mallocs; 991 if (flag == MAT_LOCAL) { 992 info->nz_used = isend[0]; 993 info->nz_allocated = isend[1]; 994 info->nz_unneeded = isend[2]; 995 info->memory = isend[3]; 996 info->mallocs = isend[4]; 997 } else if (flag == MAT_GLOBAL_MAX) { 998 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 999 info->nz_used = irecv[0]; 1000 info->nz_allocated = irecv[1]; 1001 info->nz_unneeded = irecv[2]; 1002 info->memory = irecv[3]; 1003 info->mallocs = irecv[4]; 1004 } else if (flag == MAT_GLOBAL_SUM) { 1005 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1006 info->nz_used = irecv[0]; 1007 info->nz_allocated = irecv[1]; 1008 info->nz_unneeded = irecv[2]; 1009 info->memory = irecv[3]; 1010 info->mallocs = irecv[4]; 1011 } 1012 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1013 info->fill_ratio_needed = 0; 1014 info->factor_mallocs = 0; 1015 info->rows_global = (double)matin->M; 1016 info->columns_global = (double)matin->N; 1017 info->rows_local = (double)matin->m; 1018 info->columns_local = (double)matin->N; 1019 1020 PetscFunctionReturn(0); 1021 } 1022 1023 #undef __FUNC__ 1024 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIAIJ" 1025 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1026 { 1027 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1028 int ierr; 1029 1030 PetscFunctionBegin; 1031 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1032 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1033 op == MAT_COLUMNS_UNSORTED || 1034 op == MAT_COLUMNS_SORTED || 1035 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1036 op == MAT_KEEP_ZEROED_ROWS || 1037 op == MAT_NEW_NONZERO_LOCATION_ERR || 1038 op == MAT_USE_INODES || 1039 op == MAT_DO_NOT_USE_INODES || 1040 op == MAT_IGNORE_ZERO_ENTRIES) { 1041 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1042 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1043 } else if (op == MAT_ROW_ORIENTED) { 1044 a->roworiented = PETSC_TRUE; 1045 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1046 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1047 } else if (op == MAT_ROWS_SORTED || 1048 op == MAT_ROWS_UNSORTED || 1049 op == MAT_SYMMETRIC || 1050 op == MAT_STRUCTURALLY_SYMMETRIC || 1051 op == MAT_YES_NEW_DIAGONALS) { 1052 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1053 } else if (op == MAT_COLUMN_ORIENTED) { 1054 a->roworiented = PETSC_FALSE; 1055 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1056 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1057 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1058 a->donotstash = PETSC_TRUE; 1059 } else if (op == MAT_NO_NEW_DIAGONALS){ 1060 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1061 } else { 1062 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1063 } 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNC__ 1068 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIAIJ" 1069 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1070 { 1071 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1072 1073 PetscFunctionBegin; 1074 if (m) *m = mat->rstart; 1075 if (n) *n = mat->rend; 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNC__ 1080 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIAIJ" 1081 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1082 { 1083 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1084 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1085 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1086 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1087 int *cmap,*idx_p; 1088 1089 PetscFunctionBegin; 1090 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1091 mat->getrowactive = PETSC_TRUE; 1092 1093 if (!mat->rowvalues && (idx || v)) { 1094 /* 1095 allocate enough space to hold information from the longest row. 1096 */ 1097 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1098 int max = 1,tmp; 1099 for (i=0; i<matin->m; i++) { 1100 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1101 if (max < tmp) { max = tmp; } 1102 } 1103 mat->rowvalues = (Scalar*)PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1104 mat->rowindices = (int*)(mat->rowvalues + max); 1105 } 1106 1107 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1108 lrow = row - rstart; 1109 1110 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1111 if (!v) {pvA = 0; pvB = 0;} 1112 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1113 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1114 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1115 nztot = nzA + nzB; 1116 1117 cmap = mat->garray; 1118 if (v || idx) { 1119 if (nztot) { 1120 /* Sort by increasing column numbers, assuming A and B already sorted */ 1121 int imark = -1; 1122 if (v) { 1123 *v = v_p = mat->rowvalues; 1124 for (i=0; i<nzB; i++) { 1125 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1126 else break; 1127 } 1128 imark = i; 1129 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1130 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1131 } 1132 if (idx) { 1133 *idx = idx_p = mat->rowindices; 1134 if (imark > -1) { 1135 for (i=0; i<imark; i++) { 1136 idx_p[i] = cmap[cworkB[i]]; 1137 } 1138 } else { 1139 for (i=0; i<nzB; i++) { 1140 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1141 else break; 1142 } 1143 imark = i; 1144 } 1145 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1146 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1147 } 1148 } else { 1149 if (idx) *idx = 0; 1150 if (v) *v = 0; 1151 } 1152 } 1153 *nz = nztot; 1154 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1155 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNC__ 1160 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIAIJ" 1161 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1162 { 1163 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1164 1165 PetscFunctionBegin; 1166 if (aij->getrowactive == PETSC_FALSE) { 1167 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1168 } 1169 aij->getrowactive = PETSC_FALSE; 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNC__ 1174 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIAIJ" 1175 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1176 { 1177 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1178 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1179 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1180 PetscReal sum = 0.0; 1181 Scalar *v; 1182 1183 PetscFunctionBegin; 1184 if (aij->size == 1) { 1185 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1186 } else { 1187 if (type == NORM_FROBENIUS) { 1188 v = amat->a; 1189 for (i=0; i<amat->nz; i++) { 1190 #if defined(PETSC_USE_COMPLEX) 1191 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1192 #else 1193 sum += (*v)*(*v); v++; 1194 #endif 1195 } 1196 v = bmat->a; 1197 for (i=0; i<bmat->nz; i++) { 1198 #if defined(PETSC_USE_COMPLEX) 1199 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1200 #else 1201 sum += (*v)*(*v); v++; 1202 #endif 1203 } 1204 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1205 *norm = sqrt(*norm); 1206 } else if (type == NORM_1) { /* max column norm */ 1207 PetscReal *tmp,*tmp2; 1208 int *jj,*garray = aij->garray; 1209 tmp = (PetscReal*)PetscMalloc((mat->N+1)*sizeof(PetscReal));CHKPTRQ(tmp); 1210 tmp2 = (PetscReal*)PetscMalloc((mat->N+1)*sizeof(PetscReal));CHKPTRQ(tmp2); 1211 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1212 *norm = 0.0; 1213 v = amat->a; jj = amat->j; 1214 for (j=0; j<amat->nz; j++) { 1215 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1216 } 1217 v = bmat->a; jj = bmat->j; 1218 for (j=0; j<bmat->nz; j++) { 1219 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1220 } 1221 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1222 for (j=0; j<mat->N; j++) { 1223 if (tmp2[j] > *norm) *norm = tmp2[j]; 1224 } 1225 ierr = PetscFree(tmp);CHKERRQ(ierr); 1226 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1227 } else if (type == NORM_INFINITY) { /* max row norm */ 1228 PetscReal ntemp = 0.0; 1229 for (j=0; j<aij->A->m; j++) { 1230 v = amat->a + amat->i[j] + shift; 1231 sum = 0.0; 1232 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1233 sum += PetscAbsScalar(*v); v++; 1234 } 1235 v = bmat->a + bmat->i[j] + shift; 1236 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1237 sum += PetscAbsScalar(*v); v++; 1238 } 1239 if (sum > ntemp) ntemp = sum; 1240 } 1241 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1242 } else { 1243 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1244 } 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNC__ 1250 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIAIJ" 1251 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1252 { 1253 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1254 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1255 int ierr,shift = Aloc->indexshift; 1256 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1257 Mat B; 1258 Scalar *array; 1259 1260 PetscFunctionBegin; 1261 if (!matout && M != N) { 1262 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1263 } 1264 1265 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1266 1267 /* copy over the A part */ 1268 Aloc = (Mat_SeqAIJ*)a->A->data; 1269 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1270 row = a->rstart; 1271 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1272 for (i=0; i<m; i++) { 1273 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1274 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1275 } 1276 aj = Aloc->j; 1277 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1278 1279 /* copy over the B part */ 1280 Aloc = (Mat_SeqAIJ*)a->B->data; 1281 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1282 row = a->rstart; 1283 ct = cols = (int*)PetscMalloc((1+ai[m]-shift)*sizeof(int));CHKPTRQ(cols); 1284 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1285 for (i=0; i<m; i++) { 1286 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1287 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1288 } 1289 ierr = PetscFree(ct);CHKERRQ(ierr); 1290 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1291 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1292 if (matout) { 1293 *matout = B; 1294 } else { 1295 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1296 } 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNC__ 1301 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIAIJ" 1302 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1303 { 1304 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1305 Mat a = aij->A,b = aij->B; 1306 int ierr,s1,s2,s3; 1307 1308 PetscFunctionBegin; 1309 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1310 if (rr) { 1311 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1312 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1313 /* Overlap communication with computation. */ 1314 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1315 } 1316 if (ll) { 1317 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1318 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1319 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1320 } 1321 /* scale the diagonal block */ 1322 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1323 1324 if (rr) { 1325 /* Do a scatter end and then right scale the off-diagonal block */ 1326 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1327 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1328 } 1329 1330 PetscFunctionReturn(0); 1331 } 1332 1333 1334 #undef __FUNC__ 1335 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIAIJ" 1336 int MatPrintHelp_MPIAIJ(Mat A) 1337 { 1338 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1339 int ierr; 1340 1341 PetscFunctionBegin; 1342 if (!a->rank) { 1343 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1344 } 1345 PetscFunctionReturn(0); 1346 } 1347 1348 #undef __FUNC__ 1349 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIAIJ" 1350 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1351 { 1352 PetscFunctionBegin; 1353 *bs = 1; 1354 PetscFunctionReturn(0); 1355 } 1356 #undef __FUNC__ 1357 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIAIJ" 1358 int MatSetUnfactored_MPIAIJ(Mat A) 1359 { 1360 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1361 int ierr; 1362 1363 PetscFunctionBegin; 1364 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNC__ 1369 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIAIJ" 1370 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1371 { 1372 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1373 Mat a,b,c,d; 1374 PetscTruth flg; 1375 int ierr; 1376 1377 PetscFunctionBegin; 1378 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1379 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1380 a = matA->A; b = matA->B; 1381 c = matB->A; d = matB->B; 1382 1383 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1384 if (flg == PETSC_TRUE) { 1385 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1386 } 1387 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1388 PetscFunctionReturn(0); 1389 } 1390 1391 #undef __FUNC__ 1392 #define __FUNC__ /*<a name=""></a>*/"MatCopy_MPIAIJ" 1393 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1394 { 1395 int ierr; 1396 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1397 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1398 PetscTruth flg; 1399 1400 PetscFunctionBegin; 1401 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1402 if (str != SAME_NONZERO_PATTERN || !flg) { 1403 /* because of the column compression in the off-processor part of the matrix a->B, 1404 the number of columns in a->B and b->B may be different, hence we cannot call 1405 the MatCopy() directly on the two parts. If need be, we can provide a more 1406 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1407 then copying the submatrices */ 1408 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1409 } else { 1410 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1411 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1412 } 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNC__ 1417 #define __FUNC__ /*<a name="MatSetUpPreallocation_MPIAIJ"></a>*/"MatSetUpPreallocation_MPIAIJ" 1418 int MatSetUpPreallocation_MPIAIJ(Mat A) 1419 { 1420 int ierr; 1421 1422 PetscFunctionBegin; 1423 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1428 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1429 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1430 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1431 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1432 1433 /* -------------------------------------------------------------------*/ 1434 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1435 MatGetRow_MPIAIJ, 1436 MatRestoreRow_MPIAIJ, 1437 MatMult_MPIAIJ, 1438 MatMultAdd_MPIAIJ, 1439 MatMultTranspose_MPIAIJ, 1440 MatMultTransposeAdd_MPIAIJ, 1441 0, 1442 0, 1443 0, 1444 0, 1445 0, 1446 0, 1447 MatRelax_MPIAIJ, 1448 MatTranspose_MPIAIJ, 1449 MatGetInfo_MPIAIJ, 1450 MatEqual_MPIAIJ, 1451 MatGetDiagonal_MPIAIJ, 1452 MatDiagonalScale_MPIAIJ, 1453 MatNorm_MPIAIJ, 1454 MatAssemblyBegin_MPIAIJ, 1455 MatAssemblyEnd_MPIAIJ, 1456 0, 1457 MatSetOption_MPIAIJ, 1458 MatZeroEntries_MPIAIJ, 1459 MatZeroRows_MPIAIJ, 1460 0, 1461 0, 1462 0, 1463 0, 1464 MatSetUpPreallocation_MPIAIJ, 1465 0, 1466 MatGetOwnershipRange_MPIAIJ, 1467 0, 1468 0, 1469 0, 1470 0, 1471 MatDuplicate_MPIAIJ, 1472 0, 1473 0, 1474 0, 1475 0, 1476 0, 1477 MatGetSubMatrices_MPIAIJ, 1478 MatIncreaseOverlap_MPIAIJ, 1479 MatGetValues_MPIAIJ, 1480 MatCopy_MPIAIJ, 1481 MatPrintHelp_MPIAIJ, 1482 MatScale_MPIAIJ, 1483 0, 1484 0, 1485 0, 1486 MatGetBlockSize_MPIAIJ, 1487 0, 1488 0, 1489 0, 1490 0, 1491 MatFDColoringCreate_MPIAIJ, 1492 0, 1493 MatSetUnfactored_MPIAIJ, 1494 0, 1495 0, 1496 MatGetSubMatrix_MPIAIJ, 1497 MatDestroy_MPIAIJ, 1498 MatView_MPIAIJ, 1499 MatGetMaps_Petsc}; 1500 1501 /* ----------------------------------------------------------------------------------------*/ 1502 1503 EXTERN_C_BEGIN 1504 #undef __FUNC__ 1505 #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIAIJ" 1506 int MatStoreValues_MPIAIJ(Mat mat) 1507 { 1508 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1509 int ierr; 1510 1511 PetscFunctionBegin; 1512 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1513 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1514 PetscFunctionReturn(0); 1515 } 1516 EXTERN_C_END 1517 1518 EXTERN_C_BEGIN 1519 #undef __FUNC__ 1520 #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIAIJ" 1521 int MatRetrieveValues_MPIAIJ(Mat mat) 1522 { 1523 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1524 int ierr; 1525 1526 PetscFunctionBegin; 1527 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1528 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 EXTERN_C_END 1532 1533 #include "petscpc.h" 1534 EXTERN_C_BEGIN 1535 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1536 EXTERN_C_END 1537 1538 EXTERN_C_BEGIN 1539 #undef __FUNC__ 1540 #define __FUNC__ /*<a name=""></a>*/"MatCreate_MPIAIJ" 1541 int MatCreate_MPIAIJ(Mat B) 1542 { 1543 Mat_MPIAIJ *b; 1544 int ierr,i,size; 1545 1546 PetscFunctionBegin; 1547 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1548 1549 B->data = (void*)(b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1550 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1551 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1552 B->factor = 0; 1553 B->assembled = PETSC_FALSE; 1554 B->mapping = 0; 1555 1556 B->insertmode = NOT_SET_VALUES; 1557 b->size = size; 1558 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1559 1560 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1561 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1562 1563 /* the information in the maps duplicates the information computed below, eventually 1564 we should remove the duplicate information that is not contained in the maps */ 1565 ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1566 ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1567 1568 /* build local table of row and column ownerships */ 1569 b->rowners = (int*)PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1570 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1571 b->cowners = b->rowners + b->size + 2; 1572 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1573 b->rowners[0] = 0; 1574 for (i=2; i<=b->size; i++) { 1575 b->rowners[i] += b->rowners[i-1]; 1576 } 1577 b->rstart = b->rowners[b->rank]; 1578 b->rend = b->rowners[b->rank+1]; 1579 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1580 b->cowners[0] = 0; 1581 for (i=2; i<=b->size; i++) { 1582 b->cowners[i] += b->cowners[i-1]; 1583 } 1584 b->cstart = b->cowners[b->rank]; 1585 b->cend = b->cowners[b->rank+1]; 1586 1587 /* build cache for off array entries formed */ 1588 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1589 b->donotstash = PETSC_FALSE; 1590 b->colmap = 0; 1591 b->garray = 0; 1592 b->roworiented = PETSC_TRUE; 1593 1594 /* stuff used for matrix vector multiply */ 1595 b->lvec = PETSC_NULL; 1596 b->Mvctx = PETSC_NULL; 1597 1598 /* stuff for MatGetRow() */ 1599 b->rowindices = 0; 1600 b->rowvalues = 0; 1601 b->getrowactive = PETSC_FALSE; 1602 1603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1604 "MatStoreValues_MPIAIJ", 1605 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1607 "MatRetrieveValues_MPIAIJ", 1608 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1610 "MatGetDiagonalBlock_MPIAIJ", 1611 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1612 PetscFunctionReturn(0); 1613 } 1614 EXTERN_C_END 1615 1616 #undef __FUNC__ 1617 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIAIJ" 1618 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1619 { 1620 Mat mat; 1621 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1622 int ierr,len = 0; 1623 1624 PetscFunctionBegin; 1625 *newmat = 0; 1626 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1627 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1628 a = (Mat_MPIAIJ*)mat->data; 1629 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1630 mat->factor = matin->factor; 1631 mat->assembled = PETSC_TRUE; 1632 mat->insertmode = NOT_SET_VALUES; 1633 mat->preallocated = PETSC_TRUE; 1634 1635 a->rstart = oldmat->rstart; 1636 a->rend = oldmat->rend; 1637 a->cstart = oldmat->cstart; 1638 a->cend = oldmat->cend; 1639 a->size = oldmat->size; 1640 a->rank = oldmat->rank; 1641 a->donotstash = oldmat->donotstash; 1642 a->roworiented = oldmat->roworiented; 1643 a->rowindices = 0; 1644 a->rowvalues = 0; 1645 a->getrowactive = PETSC_FALSE; 1646 1647 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1648 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1649 if (oldmat->colmap) { 1650 #if defined (PETSC_USE_CTABLE) 1651 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1652 #else 1653 a->colmap = (int*)PetscMalloc((mat->N)*sizeof(int));CHKPTRQ(a->colmap); 1654 PLogObjectMemory(mat,(mat->N)*sizeof(int)); 1655 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1656 #endif 1657 } else a->colmap = 0; 1658 if (oldmat->garray) { 1659 len = oldmat->B->n; 1660 a->garray = (int*)PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1661 PLogObjectMemory(mat,len*sizeof(int)); 1662 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1663 } else a->garray = 0; 1664 1665 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1666 PLogObjectParent(mat,a->lvec); 1667 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1668 PLogObjectParent(mat,a->Mvctx); 1669 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1670 PLogObjectParent(mat,a->A); 1671 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1672 PLogObjectParent(mat,a->B); 1673 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1674 *newmat = mat; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 #include "petscsys.h" 1679 1680 EXTERN_C_BEGIN 1681 #undef __FUNC__ 1682 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIAIJ" 1683 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1684 { 1685 Mat A; 1686 Scalar *vals,*svals; 1687 MPI_Comm comm = ((PetscObject)viewer)->comm; 1688 MPI_Status status; 1689 int i,nz,ierr,j,rstart,rend,fd; 1690 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1691 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1692 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1693 1694 PetscFunctionBegin; 1695 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1696 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1697 if (!rank) { 1698 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1699 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1700 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1701 if (header[3] < 0) { 1702 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1703 } 1704 } 1705 1706 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1707 M = header[1]; N = header[2]; 1708 /* determine ownership of all rows */ 1709 m = M/size + ((M % size) > rank); 1710 rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1711 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1712 rowners[0] = 0; 1713 for (i=2; i<=size; i++) { 1714 rowners[i] += rowners[i-1]; 1715 } 1716 rstart = rowners[rank]; 1717 rend = rowners[rank+1]; 1718 1719 /* distribute row lengths to all processors */ 1720 ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens); 1721 offlens = ourlens + (rend-rstart); 1722 if (!rank) { 1723 rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths); 1724 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1725 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 1726 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1727 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1728 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1729 } else { 1730 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1731 } 1732 1733 if (!rank) { 1734 /* calculate the number of nonzeros on each processor */ 1735 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 1736 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1737 for (i=0; i<size; i++) { 1738 for (j=rowners[i]; j< rowners[i+1]; j++) { 1739 procsnz[i] += rowlengths[j]; 1740 } 1741 } 1742 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1743 1744 /* determine max buffer needed and allocate it */ 1745 maxnz = 0; 1746 for (i=0; i<size; i++) { 1747 maxnz = PetscMax(maxnz,procsnz[i]); 1748 } 1749 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 1750 1751 /* read in my part of the matrix column indices */ 1752 nz = procsnz[0]; 1753 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 1754 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1755 1756 /* read in every one elses and ship off */ 1757 for (i=1; i<size; i++) { 1758 nz = procsnz[i]; 1759 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1760 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1761 } 1762 ierr = PetscFree(cols);CHKERRQ(ierr); 1763 } else { 1764 /* determine buffer space needed for message */ 1765 nz = 0; 1766 for (i=0; i<m; i++) { 1767 nz += ourlens[i]; 1768 } 1769 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 1770 1771 /* receive message of column indices*/ 1772 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1773 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1774 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1775 } 1776 1777 /* determine column ownership if matrix is not square */ 1778 if (N != M) { 1779 n = N/size + ((N % size) > rank); 1780 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1781 cstart = cend - n; 1782 } else { 1783 cstart = rstart; 1784 cend = rend; 1785 n = cend - cstart; 1786 } 1787 1788 /* loop over local rows, determining number of off diagonal entries */ 1789 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1790 jj = 0; 1791 for (i=0; i<m; i++) { 1792 for (j=0; j<ourlens[i]; j++) { 1793 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1794 jj++; 1795 } 1796 } 1797 1798 /* create our matrix */ 1799 for (i=0; i<m; i++) { 1800 ourlens[i] -= offlens[i]; 1801 } 1802 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1803 A = *newmat; 1804 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1805 for (i=0; i<m; i++) { 1806 ourlens[i] += offlens[i]; 1807 } 1808 1809 if (!rank) { 1810 vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals); 1811 1812 /* read in my part of the matrix numerical values */ 1813 nz = procsnz[0]; 1814 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1815 1816 /* insert into matrix */ 1817 jj = rstart; 1818 smycols = mycols; 1819 svals = vals; 1820 for (i=0; i<m; i++) { 1821 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1822 smycols += ourlens[i]; 1823 svals += ourlens[i]; 1824 jj++; 1825 } 1826 1827 /* read in other processors and ship out */ 1828 for (i=1; i<size; i++) { 1829 nz = procsnz[i]; 1830 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1831 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1832 } 1833 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1834 } else { 1835 /* receive numeric values */ 1836 vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals); 1837 1838 /* receive message of values*/ 1839 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1840 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1841 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1842 1843 /* insert into matrix */ 1844 jj = rstart; 1845 smycols = mycols; 1846 svals = vals; 1847 for (i=0; i<m; i++) { 1848 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1849 smycols += ourlens[i]; 1850 svals += ourlens[i]; 1851 jj++; 1852 } 1853 } 1854 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1855 ierr = PetscFree(vals);CHKERRQ(ierr); 1856 ierr = PetscFree(mycols);CHKERRQ(ierr); 1857 ierr = PetscFree(rowners);CHKERRQ(ierr); 1858 1859 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1860 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1861 PetscFunctionReturn(0); 1862 } 1863 EXTERN_C_END 1864 1865 #undef __FUNC__ 1866 #define __FUNC__ /*<a name=""></a>*/"MatGetSubMatrix_MPIAIJ" 1867 /* 1868 Not great since it makes two copies of the submatrix, first an SeqAIJ 1869 in local and then by concatenating the local matrices the end result. 1870 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1871 */ 1872 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1873 { 1874 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1875 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1876 Mat *local,M,Mreuse; 1877 Scalar *vwork,*aa; 1878 MPI_Comm comm = mat->comm; 1879 Mat_SeqAIJ *aij; 1880 1881 1882 PetscFunctionBegin; 1883 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1884 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1885 1886 if (call == MAT_REUSE_MATRIX) { 1887 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1888 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1889 local = &Mreuse; 1890 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1891 } else { 1892 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1893 Mreuse = *local; 1894 ierr = PetscFree(local);CHKERRQ(ierr); 1895 } 1896 1897 /* 1898 m - number of local rows 1899 n - number of columns (same on all processors) 1900 rstart - first row in new global matrix generated 1901 */ 1902 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1903 if (call == MAT_INITIAL_MATRIX) { 1904 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1905 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1906 ii = aij->i; 1907 jj = aij->j; 1908 1909 /* 1910 Determine the number of non-zeros in the diagonal and off-diagonal 1911 portions of the matrix in order to do correct preallocation 1912 */ 1913 1914 /* first get start and end of "diagonal" columns */ 1915 if (csize == PETSC_DECIDE) { 1916 nlocal = n/size + ((n % size) > rank); 1917 } else { 1918 nlocal = csize; 1919 } 1920 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1921 rstart = rend - nlocal; 1922 if (rank == size - 1 && rend != n) { 1923 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1924 } 1925 1926 /* next, compute all the lengths */ 1927 dlens = (int*)PetscMalloc((2*m+1)*sizeof(int));CHKPTRQ(dlens); 1928 olens = dlens + m; 1929 for (i=0; i<m; i++) { 1930 jend = ii[i+1] - ii[i]; 1931 olen = 0; 1932 dlen = 0; 1933 for (j=0; j<jend; j++) { 1934 if (*jj < rstart || *jj >= rend) olen++; 1935 else dlen++; 1936 jj++; 1937 } 1938 olens[i] = olen; 1939 dlens[i] = dlen; 1940 } 1941 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1942 ierr = PetscFree(dlens);CHKERRQ(ierr); 1943 } else { 1944 int ml,nl; 1945 1946 M = *newmat; 1947 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1948 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1949 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1950 /* 1951 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1952 rather than the slower MatSetValues(). 1953 */ 1954 M->was_assembled = PETSC_TRUE; 1955 M->assembled = PETSC_FALSE; 1956 } 1957 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1958 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1959 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1960 ii = aij->i; 1961 jj = aij->j; 1962 aa = aij->a; 1963 for (i=0; i<m; i++) { 1964 row = rstart + i; 1965 nz = ii[i+1] - ii[i]; 1966 cwork = jj; jj += nz; 1967 vwork = aa; aa += nz; 1968 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1969 } 1970 1971 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1972 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1973 *newmat = M; 1974 1975 /* save submatrix used in processor for next request */ 1976 if (call == MAT_INITIAL_MATRIX) { 1977 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1978 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1979 } 1980 1981 PetscFunctionReturn(0); 1982 } 1983 1984 #undef __FUNC__ 1985 #define __FUNC__ /*<a name=""></a>*/"MatMPIAIJSetPreallocation" 1986 /*@C 1987 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1988 (the default parallel PETSc format). For good matrix assembly performance 1989 the user should preallocate the matrix storage by setting the parameters 1990 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1991 performance can be increased by more than a factor of 50. 1992 1993 Collective on MPI_Comm 1994 1995 Input Parameters: 1996 + A - the matrix 1997 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1998 (same value is used for all local rows) 1999 . d_nnz - array containing the number of nonzeros in the various rows of the 2000 DIAGONAL portion of the local submatrix (possibly different for each row) 2001 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2002 The size of this array is equal to the number of local rows, i.e 'm'. 2003 You must leave room for the diagonal entry even if it is zero. 2004 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2005 submatrix (same value is used for all local rows). 2006 - o_nnz - array containing the number of nonzeros in the various rows of the 2007 OFF-DIAGONAL portion of the local submatrix (possibly different for 2008 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2009 structure. The size of this array is equal to the number 2010 of local rows, i.e 'm'. 2011 2012 The AIJ format (also called the Yale sparse matrix format or 2013 compressed row storage), is fully compatible with standard Fortran 77 2014 storage. That is, the stored row and column indices can begin at 2015 either one (as in Fortran) or zero. See the users manual for details. 2016 2017 The user MUST specify either the local or global matrix dimensions 2018 (possibly both). 2019 2020 The parallel matrix is partitioned such that the first m0 rows belong to 2021 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2022 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2023 2024 The DIAGONAL portion of the local submatrix of a processor can be defined 2025 as the submatrix which is obtained by extraction the part corresponding 2026 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2027 first row that belongs to the processor, and r2 is the last row belonging 2028 to the this processor. This is a square mxm matrix. The remaining portion 2029 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2030 2031 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2032 2033 By default, this format uses inodes (identical nodes) when possible. 2034 We search for consecutive rows with the same nonzero structure, thereby 2035 reusing matrix information to achieve increased efficiency. 2036 2037 Options Database Keys: 2038 + -mat_aij_no_inode - Do not use inodes 2039 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2040 - -mat_aij_oneindex - Internally use indexing starting at 1 2041 rather than 0. Note that when calling MatSetValues(), 2042 the user still MUST index entries starting at 0! 2043 2044 Example usage: 2045 2046 Consider the following 8x8 matrix with 34 non-zero values, that is 2047 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2048 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2049 as follows: 2050 2051 .vb 2052 1 2 0 | 0 3 0 | 0 4 2053 Proc0 0 5 6 | 7 0 0 | 8 0 2054 9 0 10 | 11 0 0 | 12 0 2055 ------------------------------------- 2056 13 0 14 | 15 16 17 | 0 0 2057 Proc1 0 18 0 | 19 20 21 | 0 0 2058 0 0 0 | 22 23 0 | 24 0 2059 ------------------------------------- 2060 Proc2 25 26 27 | 0 0 28 | 29 0 2061 30 0 0 | 31 32 33 | 0 34 2062 .ve 2063 2064 This can be represented as a collection of submatrices as: 2065 2066 .vb 2067 A B C 2068 D E F 2069 G H I 2070 .ve 2071 2072 Where the submatrices A,B,C are owned by proc0, D,E,F are 2073 owned by proc1, G,H,I are owned by proc2. 2074 2075 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2076 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2077 The 'M','N' parameters are 8,8, and have the same values on all procs. 2078 2079 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2080 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2081 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2082 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2083 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2084 matrix, ans [DF] as another SeqAIJ matrix. 2085 2086 When d_nz, o_nz parameters are specified, d_nz storage elements are 2087 allocated for every row of the local diagonal submatrix, and o_nz 2088 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2089 One way to choose d_nz and o_nz is to use the max nonzerors per local 2090 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2091 In this case, the values of d_nz,o_nz are: 2092 .vb 2093 proc0 : dnz = 2, o_nz = 2 2094 proc1 : dnz = 3, o_nz = 2 2095 proc2 : dnz = 1, o_nz = 4 2096 .ve 2097 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2098 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2099 for proc3. i.e we are using 12+15+10=37 storage locations to store 2100 34 values. 2101 2102 When d_nnz, o_nnz parameters are specified, the storage is specified 2103 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2104 In the above case the values for d_nnz,o_nnz are: 2105 .vb 2106 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2107 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2108 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2109 .ve 2110 Here the space allocated is sum of all the above values i.e 34, and 2111 hence pre-allocation is perfect. 2112 2113 Level: intermediate 2114 2115 .keywords: matrix, aij, compressed row, sparse, parallel 2116 2117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2118 @*/ 2119 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2120 { 2121 Mat_MPIAIJ *b; 2122 int ierr,i; 2123 PetscTruth flg2; 2124 2125 PetscFunctionBegin; 2126 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2127 if (!flg2) PetscFunctionReturn(0); 2128 B->preallocated = PETSC_TRUE; 2129 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -2: value %d",d_nz); 2130 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -2: value %d",o_nz); 2131 if (d_nnz) { 2132 for (i=0; i<B->m; i++) { 2133 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2134 } 2135 } 2136 if (o_nnz) { 2137 for (i=0; i<B->m; i++) { 2138 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2139 } 2140 } 2141 b = (Mat_MPIAIJ*)B->data; 2142 2143 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2144 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2145 PLogObjectParent(B,b->A); 2146 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2147 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2148 PLogObjectParent(B,b->B); 2149 2150 PetscFunctionReturn(0); 2151 } 2152 2153 #undef __FUNC__ 2154 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIAIJ" 2155 /*@C 2156 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2157 (the default parallel PETSc format). For good matrix assembly performance 2158 the user should preallocate the matrix storage by setting the parameters 2159 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2160 performance can be increased by more than a factor of 50. 2161 2162 Collective on MPI_Comm 2163 2164 Input Parameters: 2165 + comm - MPI communicator 2166 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2167 This value should be the same as the local size used in creating the 2168 y vector for the matrix-vector product y = Ax. 2169 . n - This value should be the same as the local size used in creating the 2170 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2171 calculated if N is given) For square matrices n is almost always m. 2172 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2173 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2174 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2175 (same value is used for all local rows) 2176 . d_nnz - array containing the number of nonzeros in the various rows of the 2177 DIAGONAL portion of the local submatrix (possibly different for each row) 2178 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2179 The size of this array is equal to the number of local rows, i.e 'm'. 2180 You must leave room for the diagonal entry even if it is zero. 2181 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2182 submatrix (same value is used for all local rows). 2183 - o_nnz - array containing the number of nonzeros in the various rows of the 2184 OFF-DIAGONAL portion of the local submatrix (possibly different for 2185 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2186 structure. The size of this array is equal to the number 2187 of local rows, i.e 'm'. 2188 2189 Output Parameter: 2190 . A - the matrix 2191 2192 Notes: 2193 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2194 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2195 storage requirements for this matrix. 2196 2197 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2198 processor than it must be used on all processors that share the object for 2199 that argument. 2200 2201 The AIJ format (also called the Yale sparse matrix format or 2202 compressed row storage), is fully compatible with standard Fortran 77 2203 storage. That is, the stored row and column indices can begin at 2204 either one (as in Fortran) or zero. See the users manual for details. 2205 2206 The user MUST specify either the local or global matrix dimensions 2207 (possibly both). 2208 2209 The parallel matrix is partitioned such that the first m0 rows belong to 2210 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2211 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2212 2213 The DIAGONAL portion of the local submatrix of a processor can be defined 2214 as the submatrix which is obtained by extraction the part corresponding 2215 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2216 first row that belongs to the processor, and r2 is the last row belonging 2217 to the this processor. This is a square mxm matrix. The remaining portion 2218 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2219 2220 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2221 2222 By default, this format uses inodes (identical nodes) when possible. 2223 We search for consecutive rows with the same nonzero structure, thereby 2224 reusing matrix information to achieve increased efficiency. 2225 2226 Options Database Keys: 2227 + -mat_aij_no_inode - Do not use inodes 2228 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2229 - -mat_aij_oneindex - Internally use indexing starting at 1 2230 rather than 0. Note that when calling MatSetValues(), 2231 the user still MUST index entries starting at 0! 2232 2233 2234 Example usage: 2235 2236 Consider the following 8x8 matrix with 34 non-zero values, that is 2237 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2238 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2239 as follows: 2240 2241 .vb 2242 1 2 0 | 0 3 0 | 0 4 2243 Proc0 0 5 6 | 7 0 0 | 8 0 2244 9 0 10 | 11 0 0 | 12 0 2245 ------------------------------------- 2246 13 0 14 | 15 16 17 | 0 0 2247 Proc1 0 18 0 | 19 20 21 | 0 0 2248 0 0 0 | 22 23 0 | 24 0 2249 ------------------------------------- 2250 Proc2 25 26 27 | 0 0 28 | 29 0 2251 30 0 0 | 31 32 33 | 0 34 2252 .ve 2253 2254 This can be represented as a collection of submatrices as: 2255 2256 .vb 2257 A B C 2258 D E F 2259 G H I 2260 .ve 2261 2262 Where the submatrices A,B,C are owned by proc0, D,E,F are 2263 owned by proc1, G,H,I are owned by proc2. 2264 2265 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2266 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2267 The 'M','N' parameters are 8,8, and have the same values on all procs. 2268 2269 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2270 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2271 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2272 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2273 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2274 matrix, ans [DF] as another SeqAIJ matrix. 2275 2276 When d_nz, o_nz parameters are specified, d_nz storage elements are 2277 allocated for every row of the local diagonal submatrix, and o_nz 2278 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2279 One way to choose d_nz and o_nz is to use the max nonzerors per local 2280 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2281 In this case, the values of d_nz,o_nz are: 2282 .vb 2283 proc0 : dnz = 2, o_nz = 2 2284 proc1 : dnz = 3, o_nz = 2 2285 proc2 : dnz = 1, o_nz = 4 2286 .ve 2287 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2288 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2289 for proc3. i.e we are using 12+15+10=37 storage locations to store 2290 34 values. 2291 2292 When d_nnz, o_nnz parameters are specified, the storage is specified 2293 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2294 In the above case the values for d_nnz,o_nnz are: 2295 .vb 2296 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2297 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2298 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2299 .ve 2300 Here the space allocated is sum of all the above values i.e 34, and 2301 hence pre-allocation is perfect. 2302 2303 Level: intermediate 2304 2305 .keywords: matrix, aij, compressed row, sparse, parallel 2306 2307 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2308 @*/ 2309 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2310 { 2311 int ierr,size; 2312 2313 PetscFunctionBegin; 2314 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2315 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2316 if (size > 1) { 2317 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2318 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2319 } else { 2320 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2321 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2322 } 2323 PetscFunctionReturn(0); 2324 } 2325