1 /*$Id: mpiaij.c,v 1.323 2000/11/28 17:29:00 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 Scalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(Scalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(Scalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 Scalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(Scalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(Scalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNC__ 193 #define __FUNC__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 Scalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 Scalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 Scalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 Scalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNC__ 279 #define __FUNC__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNC__ 322 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNC__ 349 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 Scalar *val; 356 InsertMode addv = mat->insertmode; 357 358 PetscFunctionBegin; 359 if (!aij->donotstash) { 360 while (1) { 361 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 362 if (!flg) break; 363 364 for (i=0; i<n;) { 365 /* Now identify the consecutive vals belonging to the same row */ 366 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 367 if (j < n) ncols = j-i; 368 else ncols = n-i; 369 /* Now assemble all these values with a single function call */ 370 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 371 i = j; 372 } 373 } 374 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 375 } 376 377 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 378 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 379 380 /* determine if any processor has disassembled, if so we must 381 also disassemble ourselfs, in order that we may reassemble. */ 382 /* 383 if nonzero structure of submatrix B cannot change then we know that 384 no processor disassembled thus we can skip this stuff 385 */ 386 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 387 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 388 if (mat->was_assembled && !other_disassembled) { 389 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNC__ 407 #define __FUNC__ "MatZeroEntries_MPIAIJ" 408 int MatZeroEntries_MPIAIJ(Mat A) 409 { 410 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 411 int ierr; 412 413 PetscFunctionBegin; 414 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 415 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNC__ 420 #define __FUNC__ "MatZeroRows_MPIAIJ" 421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 425 int *procs,*nprocs,j,idx,nsends,*work,row; 426 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 427 int *rvalues,tag = A->tag,count,base,slen,n,*source; 428 int *lens,imdex,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 PetscTruth found; 434 435 PetscFunctionBegin; 436 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 437 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 438 439 /* first count number of contributors to each processor */ 440 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 441 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 442 procs = nprocs + size; 443 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 444 for (i=0; i<N; i++) { 445 idx = rows[i]; 446 found = PETSC_FALSE; 447 for (j=0; j<size; j++) { 448 if (idx >= owners[j] && idx < owners[j+1]) { 449 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 450 } 451 } 452 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 453 } 454 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 455 456 /* inform other processors of number of messages and max length*/ 457 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 458 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 459 nrecvs = work[size+rank]; 460 nmax = work[rank]; 461 ierr = PetscFree(work);CHKERRQ(ierr); 462 463 /* post receives: */ 464 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 465 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 466 for (i=0; i<nrecvs; i++) { 467 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 468 } 469 470 /* do sends: 471 1) starts[i] gives the starting index in svalues for stuff going to 472 the ith processor 473 */ 474 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 475 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 476 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 477 starts[0] = 0; 478 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 479 for (i=0; i<N; i++) { 480 svalues[starts[owner[i]]++] = rows[i]; 481 } 482 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 483 484 starts[0] = 0; 485 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 486 count = 0; 487 for (i=0; i<size; i++) { 488 if (procs[i]) { 489 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscFree(starts);CHKERRQ(ierr); 493 494 base = owners[rank]; 495 496 /* wait on receives */ 497 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 498 source = lens + nrecvs; 499 count = nrecvs; slen = 0; 500 while (count) { 501 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 502 /* unpack receives into our local space */ 503 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 504 source[imdex] = recv_status.MPI_SOURCE; 505 lens[imdex] = n; 506 slen += n; 507 count--; 508 } 509 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 510 511 /* move the data into the send scatter */ 512 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 513 count = 0; 514 for (i=0; i<nrecvs; i++) { 515 values = rvalues + i*nmax; 516 for (j=0; j<lens[i]; j++) { 517 lrows[count++] = values[j] - base; 518 } 519 } 520 ierr = PetscFree(rvalues);CHKERRQ(ierr); 521 ierr = PetscFree(lens);CHKERRQ(ierr); 522 ierr = PetscFree(owner);CHKERRQ(ierr); 523 ierr = PetscFree(nprocs);CHKERRQ(ierr); 524 525 /* actually zap the local rows */ 526 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 527 PetscLogObjectParent(A,istmp); 528 529 /* 530 Zero the required rows. If the "diagonal block" of the matrix 531 is square and the user wishes to set the diagonal we use seperate 532 code so that MatSetValues() is not called for each diagonal allocating 533 new memory, thus calling lots of mallocs and slowing things down. 534 535 Contributed by: Mathew Knepley 536 */ 537 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 538 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 539 if (diag && (l->A->M == l->A->N)) { 540 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 541 } else if (diag) { 542 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 543 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 544 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 546 } 547 for (i = 0; i < slen; i++) { 548 row = lrows[i] + rstart; 549 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 550 } 551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 } else { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 } 556 ierr = ISDestroy(istmp);CHKERRQ(ierr); 557 ierr = PetscFree(lrows);CHKERRQ(ierr); 558 559 /* wait on sends */ 560 if (nsends) { 561 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 562 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 563 ierr = PetscFree(send_status);CHKERRQ(ierr); 564 } 565 ierr = PetscFree(send_waits);CHKERRQ(ierr); 566 ierr = PetscFree(svalues);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNC__ 572 #define __FUNC__ "MatMult_MPIAIJ" 573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 574 { 575 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 576 int ierr,nt; 577 578 PetscFunctionBegin; 579 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 580 if (nt != A->n) { 581 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 582 } 583 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 585 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 586 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNC__ 591 #define __FUNC__ "MatMultAdd_MPIAIJ" 592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 593 { 594 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 595 int ierr; 596 597 PetscFunctionBegin; 598 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 600 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 601 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNC__ 606 #define __FUNC__ "MatMultTranspose_MPIAIJ" 607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 608 { 609 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 610 int ierr; 611 612 PetscFunctionBegin; 613 /* do nondiagonal part */ 614 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 615 /* send it on its way */ 616 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 /* do local part */ 618 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 619 /* receive remote parts: note this assumes the values are not actually */ 620 /* inserted in yy until the next line, which is true for my implementation*/ 621 /* but is not perhaps always true. */ 622 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNC__ 627 #define __FUNC__ "MatMultTransposeAdd_MPIAIJ" 628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 629 { 630 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 631 int ierr; 632 633 PetscFunctionBegin; 634 /* do nondiagonal part */ 635 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 636 /* send it on its way */ 637 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 638 /* do local part */ 639 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 640 /* receive remote parts: note this assumes the values are not actually */ 641 /* inserted in yy until the next line, which is true for my implementation*/ 642 /* but is not perhaps always true. */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNC__ 652 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 int ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNC__ 668 #define __FUNC__ "MatScale_MPIAIJ" 669 int MatScale_MPIAIJ(Scalar *aa,Mat A) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 int ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 676 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNC__ 681 #define __FUNC__ "MatDestroy_MPIAIJ" 682 int MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 int ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNC__ 709 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 714 int ierr,format,shift = C->indexshift,rank = aij->rank,size = aij->size; 715 PetscTruth isdraw,isascii,flg; 716 PetscViewer sviewer; 717 718 PetscFunctionBegin; 719 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 720 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 721 if (isascii) { 722 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 723 if (format == PETSC_VIEWER_FORMAT_ASCII_INFO_LONG) { 724 MatInfo info; 725 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 726 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 727 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 728 if (flg) { 729 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 730 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 731 } else { 732 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 733 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 734 } 735 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 736 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 737 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 738 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 739 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 740 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } else if (format == PETSC_VIEWER_FORMAT_ASCII_INFO) { 743 PetscFunctionReturn(0); 744 } 745 } else if (isdraw) { 746 PetscDraw draw; 747 PetscTruth isnull; 748 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 749 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 750 } 751 752 if (size == 1) { 753 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 754 } else { 755 /* assemble the entire matrix onto first processor. */ 756 Mat A; 757 Mat_SeqAIJ *Aloc; 758 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 759 Scalar *a; 760 761 if (!rank) { 762 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 763 } else { 764 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 765 } 766 PetscLogObjectParent(mat,A); 767 768 /* copy over the A part */ 769 Aloc = (Mat_SeqAIJ*)aij->A->data; 770 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 771 row = aij->rstart; 772 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 773 for (i=0; i<m; i++) { 774 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 775 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 776 } 777 aj = Aloc->j; 778 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 779 780 /* copy over the B part */ 781 Aloc = (Mat_SeqAIJ*)aij->B->data; 782 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 783 row = aij->rstart; 784 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 785 ct = cols; 786 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 787 for (i=0; i<m; i++) { 788 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 789 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 790 } 791 ierr = PetscFree(ct);CHKERRQ(ierr); 792 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 793 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 794 /* 795 Everyone has to call to draw the matrix since the graphics waits are 796 synchronized across all processors that share the PetscDraw object 797 */ 798 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 799 if (!rank) { 800 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 801 } 802 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 803 ierr = MatDestroy(A);CHKERRQ(ierr); 804 } 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNC__ 809 #define __FUNC__ "MatView_MPIAIJ" 810 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 811 { 812 int ierr; 813 PetscTruth isascii,isdraw,issocket,isbinary; 814 815 PetscFunctionBegin; 816 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 817 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 818 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 819 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 820 if (isascii || isdraw || isbinary || issocket) { 821 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 822 } else { 823 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 824 } 825 PetscFunctionReturn(0); 826 } 827 828 /* 829 This has to provide several versions. 830 831 2) a) use only local smoothing updating outer values only once. 832 b) local smoothing updating outer values each inner iteration 833 3) color updating out values betwen colors. 834 */ 835 #undef __FUNC__ 836 #define __FUNC__ "MatRelax_MPIAIJ" 837 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx) 838 { 839 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 840 Mat AA = mat->A,BB = mat->B; 841 Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data; 842 Scalar *b,*x,*xs,*ls,d,*v,sum; 843 int ierr,*idx,*diag; 844 int n = matin->n,m = matin->m,i,shift = A->indexshift; 845 846 PetscFunctionBegin; 847 if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);} 848 diag = A->diag; 849 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 850 if (flag & SOR_ZERO_INITIAL_GUESS) { 851 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 855 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 856 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 857 if (xx != bb) { 858 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 859 } else { 860 b = x; 861 } 862 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 863 xs = x + shift; /* shift by one for index start of 1 */ 864 ls = ls + shift; 865 while (its--) { 866 /* go down through the rows */ 867 for (i=0; i<m; i++) { 868 n = A->i[i+1] - A->i[i]; 869 PetscLogFlops(4*n+3); 870 idx = A->j + A->i[i] + shift; 871 v = A->a + A->i[i] + shift; 872 sum = b[i]; 873 SPARSEDENSEMDOT(sum,xs,v,idx,n); 874 d = fshift + A->a[diag[i]+shift]; 875 n = B->i[i+1] - B->i[i]; 876 idx = B->j + B->i[i] + shift; 877 v = B->a + B->i[i] + shift; 878 SPARSEDENSEMDOT(sum,ls,v,idx,n); 879 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 880 } 881 /* come up through the rows */ 882 for (i=m-1; i>-1; i--) { 883 n = A->i[i+1] - A->i[i]; 884 PetscLogFlops(4*n+3); 885 idx = A->j + A->i[i] + shift; 886 v = A->a + A->i[i] + shift; 887 sum = b[i]; 888 SPARSEDENSEMDOT(sum,xs,v,idx,n); 889 d = fshift + A->a[diag[i]+shift]; 890 n = B->i[i+1] - B->i[i]; 891 idx = B->j + B->i[i] + shift; 892 v = B->a + B->i[i] + shift; 893 SPARSEDENSEMDOT(sum,ls,v,idx,n); 894 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 895 } 896 } 897 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 898 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 899 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 900 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 901 if (flag & SOR_ZERO_INITIAL_GUESS) { 902 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 906 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 907 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 908 if (xx != bb) { 909 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 910 } else { 911 b = x; 912 } 913 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 914 xs = x + shift; /* shift by one for index start of 1 */ 915 ls = ls + shift; 916 while (its--) { 917 for (i=0; i<m; i++) { 918 n = A->i[i+1] - A->i[i]; 919 PetscLogFlops(4*n+3); 920 idx = A->j + A->i[i] + shift; 921 v = A->a + A->i[i] + shift; 922 sum = b[i]; 923 SPARSEDENSEMDOT(sum,xs,v,idx,n); 924 d = fshift + A->a[diag[i]+shift]; 925 n = B->i[i+1] - B->i[i]; 926 idx = B->j + B->i[i] + shift; 927 v = B->a + B->i[i] + shift; 928 SPARSEDENSEMDOT(sum,ls,v,idx,n); 929 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 930 } 931 } 932 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 933 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 934 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 935 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 936 if (flag & SOR_ZERO_INITIAL_GUESS) { 937 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 938 PetscFunctionReturn(0); 939 } 940 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 941 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 942 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 943 if (xx != bb) { 944 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 945 } else { 946 b = x; 947 } 948 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 949 xs = x + shift; /* shift by one for index start of 1 */ 950 ls = ls + shift; 951 while (its--) { 952 for (i=m-1; i>-1; i--) { 953 n = A->i[i+1] - A->i[i]; 954 PetscLogFlops(4*n+3); 955 idx = A->j + A->i[i] + shift; 956 v = A->a + A->i[i] + shift; 957 sum = b[i]; 958 SPARSEDENSEMDOT(sum,xs,v,idx,n); 959 d = fshift + A->a[diag[i]+shift]; 960 n = B->i[i+1] - B->i[i]; 961 idx = B->j + B->i[i] + shift; 962 v = B->a + B->i[i] + shift; 963 SPARSEDENSEMDOT(sum,ls,v,idx,n); 964 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 965 } 966 } 967 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 968 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 969 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 970 } else { 971 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 972 } 973 PetscFunctionReturn(0); 974 } 975 976 #undef __FUNC__ 977 #define __FUNC__ "MatGetInfo_MPIAIJ" 978 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 979 { 980 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 981 Mat A = mat->A,B = mat->B; 982 int ierr; 983 PetscReal isend[5],irecv[5]; 984 985 PetscFunctionBegin; 986 info->block_size = 1.0; 987 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 988 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 989 isend[3] = info->memory; isend[4] = info->mallocs; 990 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 991 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 992 isend[3] += info->memory; isend[4] += info->mallocs; 993 if (flag == MAT_LOCAL) { 994 info->nz_used = isend[0]; 995 info->nz_allocated = isend[1]; 996 info->nz_unneeded = isend[2]; 997 info->memory = isend[3]; 998 info->mallocs = isend[4]; 999 } else if (flag == MAT_GLOBAL_MAX) { 1000 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1001 info->nz_used = irecv[0]; 1002 info->nz_allocated = irecv[1]; 1003 info->nz_unneeded = irecv[2]; 1004 info->memory = irecv[3]; 1005 info->mallocs = irecv[4]; 1006 } else if (flag == MAT_GLOBAL_SUM) { 1007 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1008 info->nz_used = irecv[0]; 1009 info->nz_allocated = irecv[1]; 1010 info->nz_unneeded = irecv[2]; 1011 info->memory = irecv[3]; 1012 info->mallocs = irecv[4]; 1013 } 1014 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1015 info->fill_ratio_needed = 0; 1016 info->factor_mallocs = 0; 1017 info->rows_global = (double)matin->M; 1018 info->columns_global = (double)matin->N; 1019 info->rows_local = (double)matin->m; 1020 info->columns_local = (double)matin->N; 1021 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNC__ 1026 #define __FUNC__ "MatSetOption_MPIAIJ" 1027 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1028 { 1029 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1030 int ierr; 1031 1032 PetscFunctionBegin; 1033 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1034 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1035 op == MAT_COLUMNS_UNSORTED || 1036 op == MAT_COLUMNS_SORTED || 1037 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1038 op == MAT_KEEP_ZEROED_ROWS || 1039 op == MAT_NEW_NONZERO_LOCATION_ERR || 1040 op == MAT_USE_INODES || 1041 op == MAT_DO_NOT_USE_INODES || 1042 op == MAT_IGNORE_ZERO_ENTRIES) { 1043 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1044 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1045 } else if (op == MAT_ROW_ORIENTED) { 1046 a->roworiented = PETSC_TRUE; 1047 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1048 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1049 } else if (op == MAT_ROWS_SORTED || 1050 op == MAT_ROWS_UNSORTED || 1051 op == MAT_SYMMETRIC || 1052 op == MAT_STRUCTURALLY_SYMMETRIC || 1053 op == MAT_YES_NEW_DIAGONALS) { 1054 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1055 } else if (op == MAT_COLUMN_ORIENTED) { 1056 a->roworiented = PETSC_FALSE; 1057 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1058 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1059 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1060 a->donotstash = PETSC_TRUE; 1061 } else if (op == MAT_NO_NEW_DIAGONALS){ 1062 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1063 } else { 1064 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNC__ 1070 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1071 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1072 { 1073 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1074 1075 PetscFunctionBegin; 1076 if (m) *m = mat->rstart; 1077 if (n) *n = mat->rend; 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNC__ 1082 #define __FUNC__ "MatGetRow_MPIAIJ" 1083 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1084 { 1085 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1086 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1087 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1088 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1089 int *cmap,*idx_p; 1090 1091 PetscFunctionBegin; 1092 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1093 mat->getrowactive = PETSC_TRUE; 1094 1095 if (!mat->rowvalues && (idx || v)) { 1096 /* 1097 allocate enough space to hold information from the longest row. 1098 */ 1099 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1100 int max = 1,tmp; 1101 for (i=0; i<matin->m; i++) { 1102 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1103 if (max < tmp) { max = tmp; } 1104 } 1105 ierr = PetscMalloc(max*(sizeof(int)+sizeof(Scalar)),mat->rowvalues);CHKERRQ(ierr); 1106 mat->rowindices = (int*)(mat->rowvalues + max); 1107 } 1108 1109 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1110 lrow = row - rstart; 1111 1112 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1113 if (!v) {pvA = 0; pvB = 0;} 1114 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1115 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1116 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1117 nztot = nzA + nzB; 1118 1119 cmap = mat->garray; 1120 if (v || idx) { 1121 if (nztot) { 1122 /* Sort by increasing column numbers, assuming A and B already sorted */ 1123 int imark = -1; 1124 if (v) { 1125 *v = v_p = mat->rowvalues; 1126 for (i=0; i<nzB; i++) { 1127 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1128 else break; 1129 } 1130 imark = i; 1131 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1132 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1133 } 1134 if (idx) { 1135 *idx = idx_p = mat->rowindices; 1136 if (imark > -1) { 1137 for (i=0; i<imark; i++) { 1138 idx_p[i] = cmap[cworkB[i]]; 1139 } 1140 } else { 1141 for (i=0; i<nzB; i++) { 1142 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1143 else break; 1144 } 1145 imark = i; 1146 } 1147 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1148 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1149 } 1150 } else { 1151 if (idx) *idx = 0; 1152 if (v) *v = 0; 1153 } 1154 } 1155 *nz = nztot; 1156 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1157 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNC__ 1162 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1163 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1164 { 1165 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1166 1167 PetscFunctionBegin; 1168 if (aij->getrowactive == PETSC_FALSE) { 1169 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1170 } 1171 aij->getrowactive = PETSC_FALSE; 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNC__ 1176 #define __FUNC__ "MatNorm_MPIAIJ" 1177 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1178 { 1179 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1180 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1181 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1182 PetscReal sum = 0.0; 1183 Scalar *v; 1184 1185 PetscFunctionBegin; 1186 if (aij->size == 1) { 1187 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1188 } else { 1189 if (type == NORM_FROBENIUS) { 1190 v = amat->a; 1191 for (i=0; i<amat->nz; i++) { 1192 #if defined(PETSC_USE_COMPLEX) 1193 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1194 #else 1195 sum += (*v)*(*v); v++; 1196 #endif 1197 } 1198 v = bmat->a; 1199 for (i=0; i<bmat->nz; i++) { 1200 #if defined(PETSC_USE_COMPLEX) 1201 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1202 #else 1203 sum += (*v)*(*v); v++; 1204 #endif 1205 } 1206 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1207 *norm = sqrt(*norm); 1208 } else if (type == NORM_1) { /* max column norm */ 1209 PetscReal *tmp,*tmp2; 1210 int *jj,*garray = aij->garray; 1211 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1212 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1213 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1214 *norm = 0.0; 1215 v = amat->a; jj = amat->j; 1216 for (j=0; j<amat->nz; j++) { 1217 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1218 } 1219 v = bmat->a; jj = bmat->j; 1220 for (j=0; j<bmat->nz; j++) { 1221 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1222 } 1223 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1224 for (j=0; j<mat->N; j++) { 1225 if (tmp2[j] > *norm) *norm = tmp2[j]; 1226 } 1227 ierr = PetscFree(tmp);CHKERRQ(ierr); 1228 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1229 } else if (type == NORM_INFINITY) { /* max row norm */ 1230 PetscReal ntemp = 0.0; 1231 for (j=0; j<aij->A->m; j++) { 1232 v = amat->a + amat->i[j] + shift; 1233 sum = 0.0; 1234 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1235 sum += PetscAbsScalar(*v); v++; 1236 } 1237 v = bmat->a + bmat->i[j] + shift; 1238 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1239 sum += PetscAbsScalar(*v); v++; 1240 } 1241 if (sum > ntemp) ntemp = sum; 1242 } 1243 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1244 } else { 1245 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1246 } 1247 } 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNC__ 1252 #define __FUNC__ "MatTranspose_MPIAIJ" 1253 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1254 { 1255 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1256 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1257 int ierr,shift = Aloc->indexshift; 1258 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1259 Mat B; 1260 Scalar *array; 1261 1262 PetscFunctionBegin; 1263 if (!matout && M != N) { 1264 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1265 } 1266 1267 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1268 1269 /* copy over the A part */ 1270 Aloc = (Mat_SeqAIJ*)a->A->data; 1271 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1272 row = a->rstart; 1273 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1274 for (i=0; i<m; i++) { 1275 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1276 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1277 } 1278 aj = Aloc->j; 1279 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1280 1281 /* copy over the B part */ 1282 Aloc = (Mat_SeqAIJ*)a->B->data; 1283 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1284 row = a->rstart; 1285 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1286 ct = cols; 1287 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1288 for (i=0; i<m; i++) { 1289 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1290 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1291 } 1292 ierr = PetscFree(ct);CHKERRQ(ierr); 1293 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1294 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1295 if (matout) { 1296 *matout = B; 1297 } else { 1298 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1299 } 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNC__ 1304 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1305 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1306 { 1307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1308 Mat a = aij->A,b = aij->B; 1309 int ierr,s1,s2,s3; 1310 1311 PetscFunctionBegin; 1312 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1313 if (rr) { 1314 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1315 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1316 /* Overlap communication with computation. */ 1317 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1318 } 1319 if (ll) { 1320 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1321 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1322 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1323 } 1324 /* scale the diagonal block */ 1325 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1326 1327 if (rr) { 1328 /* Do a scatter end and then right scale the off-diagonal block */ 1329 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1330 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1331 } 1332 1333 PetscFunctionReturn(0); 1334 } 1335 1336 1337 #undef __FUNC__ 1338 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1339 int MatPrintHelp_MPIAIJ(Mat A) 1340 { 1341 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1342 int ierr; 1343 1344 PetscFunctionBegin; 1345 if (!a->rank) { 1346 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1347 } 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNC__ 1352 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1353 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1354 { 1355 PetscFunctionBegin; 1356 *bs = 1; 1357 PetscFunctionReturn(0); 1358 } 1359 #undef __FUNC__ 1360 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1361 int MatSetUnfactored_MPIAIJ(Mat A) 1362 { 1363 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1364 int ierr; 1365 1366 PetscFunctionBegin; 1367 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNC__ 1372 #define __FUNC__ "MatEqual_MPIAIJ" 1373 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1374 { 1375 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1376 Mat a,b,c,d; 1377 PetscTruth flg; 1378 int ierr; 1379 1380 PetscFunctionBegin; 1381 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1382 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1383 a = matA->A; b = matA->B; 1384 c = matB->A; d = matB->B; 1385 1386 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1387 if (flg == PETSC_TRUE) { 1388 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1389 } 1390 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1391 PetscFunctionReturn(0); 1392 } 1393 1394 #undef __FUNC__ 1395 #define __FUNC__ "MatCopy_MPIAIJ" 1396 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1397 { 1398 int ierr; 1399 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1400 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1401 PetscTruth flg; 1402 1403 PetscFunctionBegin; 1404 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1405 if (str != SAME_NONZERO_PATTERN || !flg) { 1406 /* because of the column compression in the off-processor part of the matrix a->B, 1407 the number of columns in a->B and b->B may be different, hence we cannot call 1408 the MatCopy() directly on the two parts. If need be, we can provide a more 1409 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1410 then copying the submatrices */ 1411 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1412 } else { 1413 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1414 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1415 } 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNC__ 1420 #define __FUNC__ "MatSetUpPreallocation_MPIAIJ" 1421 int MatSetUpPreallocation_MPIAIJ(Mat A) 1422 { 1423 int ierr; 1424 1425 PetscFunctionBegin; 1426 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1427 PetscFunctionReturn(0); 1428 } 1429 1430 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1431 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1432 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1433 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1434 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1435 1436 /* -------------------------------------------------------------------*/ 1437 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1438 MatGetRow_MPIAIJ, 1439 MatRestoreRow_MPIAIJ, 1440 MatMult_MPIAIJ, 1441 MatMultAdd_MPIAIJ, 1442 MatMultTranspose_MPIAIJ, 1443 MatMultTransposeAdd_MPIAIJ, 1444 0, 1445 0, 1446 0, 1447 0, 1448 0, 1449 0, 1450 MatRelax_MPIAIJ, 1451 MatTranspose_MPIAIJ, 1452 MatGetInfo_MPIAIJ, 1453 MatEqual_MPIAIJ, 1454 MatGetDiagonal_MPIAIJ, 1455 MatDiagonalScale_MPIAIJ, 1456 MatNorm_MPIAIJ, 1457 MatAssemblyBegin_MPIAIJ, 1458 MatAssemblyEnd_MPIAIJ, 1459 0, 1460 MatSetOption_MPIAIJ, 1461 MatZeroEntries_MPIAIJ, 1462 MatZeroRows_MPIAIJ, 1463 0, 1464 0, 1465 0, 1466 0, 1467 MatSetUpPreallocation_MPIAIJ, 1468 0, 1469 MatGetOwnershipRange_MPIAIJ, 1470 0, 1471 0, 1472 0, 1473 0, 1474 MatDuplicate_MPIAIJ, 1475 0, 1476 0, 1477 0, 1478 0, 1479 0, 1480 MatGetSubMatrices_MPIAIJ, 1481 MatIncreaseOverlap_MPIAIJ, 1482 MatGetValues_MPIAIJ, 1483 MatCopy_MPIAIJ, 1484 MatPrintHelp_MPIAIJ, 1485 MatScale_MPIAIJ, 1486 0, 1487 0, 1488 0, 1489 MatGetBlockSize_MPIAIJ, 1490 0, 1491 0, 1492 0, 1493 0, 1494 MatFDColoringCreate_MPIAIJ, 1495 0, 1496 MatSetUnfactored_MPIAIJ, 1497 0, 1498 0, 1499 MatGetSubMatrix_MPIAIJ, 1500 MatDestroy_MPIAIJ, 1501 MatView_MPIAIJ, 1502 MatGetMaps_Petsc}; 1503 1504 /* ----------------------------------------------------------------------------------------*/ 1505 1506 EXTERN_C_BEGIN 1507 #undef __FUNC__ 1508 #define __FUNC__ "MatStoreValues_MPIAIJ" 1509 int MatStoreValues_MPIAIJ(Mat mat) 1510 { 1511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1512 int ierr; 1513 1514 PetscFunctionBegin; 1515 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1516 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 EXTERN_C_END 1520 1521 EXTERN_C_BEGIN 1522 #undef __FUNC__ 1523 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1524 int MatRetrieveValues_MPIAIJ(Mat mat) 1525 { 1526 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1527 int ierr; 1528 1529 PetscFunctionBegin; 1530 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1531 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 EXTERN_C_END 1535 1536 #include "petscpc.h" 1537 EXTERN_C_BEGIN 1538 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1539 EXTERN_C_END 1540 1541 EXTERN int MatUseXXT_MPIAIJ(Mat); 1542 EXTERN int MatUseXYT_MPIAIJ(Mat); 1543 1544 EXTERN_C_BEGIN 1545 #undef __FUNC__ 1546 #define __FUNC__ "MatCreate_MPIAIJ" 1547 int MatCreate_MPIAIJ(Mat B) 1548 { 1549 Mat_MPIAIJ *b; 1550 int ierr,i,size; 1551 PetscTruth flg; 1552 1553 PetscFunctionBegin; 1554 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1555 1556 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1557 B->data = (void*)b; 1558 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1559 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1560 B->factor = 0; 1561 B->assembled = PETSC_FALSE; 1562 B->mapping = 0; 1563 1564 B->insertmode = NOT_SET_VALUES; 1565 b->size = size; 1566 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1567 1568 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1569 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1570 1571 /* the information in the maps duplicates the information computed below, eventually 1572 we should remove the duplicate information that is not contained in the maps */ 1573 ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1574 ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1575 1576 /* build local table of row and column ownerships */ 1577 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1578 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1579 b->cowners = b->rowners + b->size + 2; 1580 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1581 b->rowners[0] = 0; 1582 for (i=2; i<=b->size; i++) { 1583 b->rowners[i] += b->rowners[i-1]; 1584 } 1585 b->rstart = b->rowners[b->rank]; 1586 b->rend = b->rowners[b->rank+1]; 1587 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1588 b->cowners[0] = 0; 1589 for (i=2; i<=b->size; i++) { 1590 b->cowners[i] += b->cowners[i-1]; 1591 } 1592 b->cstart = b->cowners[b->rank]; 1593 b->cend = b->cowners[b->rank+1]; 1594 1595 /* build cache for off array entries formed */ 1596 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1597 b->donotstash = PETSC_FALSE; 1598 b->colmap = 0; 1599 b->garray = 0; 1600 b->roworiented = PETSC_TRUE; 1601 1602 /* stuff used for matrix vector multiply */ 1603 b->lvec = PETSC_NULL; 1604 b->Mvctx = PETSC_NULL; 1605 1606 /* stuff for MatGetRow() */ 1607 b->rowindices = 0; 1608 b->rowvalues = 0; 1609 b->getrowactive = PETSC_FALSE; 1610 1611 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_mpiaij_xxt",&flg);CHKERRQ(ierr); 1612 if (flg) { ierr = MatUseXXT_MPIAIJ(B);CHKERRQ(ierr); } 1613 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_mpiaij_xyt",&flg);CHKERRQ(ierr); 1614 if (flg) { ierr = MatUseXYT_MPIAIJ(B);CHKERRQ(ierr); } 1615 1616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1617 "MatStoreValues_MPIAIJ", 1618 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1620 "MatRetrieveValues_MPIAIJ", 1621 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1623 "MatGetDiagonalBlock_MPIAIJ", 1624 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1625 PetscFunctionReturn(0); 1626 } 1627 EXTERN_C_END 1628 1629 #undef __FUNC__ 1630 #define __FUNC__ "MatDuplicate_MPIAIJ" 1631 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1632 { 1633 Mat mat; 1634 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1635 int ierr,len = 0; 1636 1637 PetscFunctionBegin; 1638 *newmat = 0; 1639 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1640 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1641 a = (Mat_MPIAIJ*)mat->data; 1642 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1643 mat->factor = matin->factor; 1644 mat->assembled = PETSC_TRUE; 1645 mat->insertmode = NOT_SET_VALUES; 1646 mat->preallocated = PETSC_TRUE; 1647 1648 a->rstart = oldmat->rstart; 1649 a->rend = oldmat->rend; 1650 a->cstart = oldmat->cstart; 1651 a->cend = oldmat->cend; 1652 a->size = oldmat->size; 1653 a->rank = oldmat->rank; 1654 a->donotstash = oldmat->donotstash; 1655 a->roworiented = oldmat->roworiented; 1656 a->rowindices = 0; 1657 a->rowvalues = 0; 1658 a->getrowactive = PETSC_FALSE; 1659 1660 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1661 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1662 if (oldmat->colmap) { 1663 #if defined (PETSC_USE_CTABLE) 1664 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1665 #else 1666 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1667 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1668 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1669 #endif 1670 } else a->colmap = 0; 1671 if (oldmat->garray) { 1672 len = oldmat->B->n; 1673 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1674 PetscLogObjectMemory(mat,len*sizeof(int)); 1675 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1676 } else a->garray = 0; 1677 1678 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1679 PetscLogObjectParent(mat,a->lvec); 1680 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1681 PetscLogObjectParent(mat,a->Mvctx); 1682 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1683 PetscLogObjectParent(mat,a->A); 1684 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1685 PetscLogObjectParent(mat,a->B); 1686 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1687 *newmat = mat; 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #include "petscsys.h" 1692 1693 EXTERN_C_BEGIN 1694 #undef __FUNC__ 1695 #define __FUNC__ "MatLoad_MPIAIJ" 1696 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1697 { 1698 Mat A; 1699 Scalar *vals,*svals; 1700 MPI_Comm comm = ((PetscObject)viewer)->comm; 1701 MPI_Status status; 1702 int i,nz,ierr,j,rstart,rend,fd; 1703 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1704 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1705 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1706 1707 PetscFunctionBegin; 1708 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1709 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1710 if (!rank) { 1711 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1712 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1713 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1714 if (header[3] < 0) { 1715 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1716 } 1717 } 1718 1719 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1720 M = header[1]; N = header[2]; 1721 /* determine ownership of all rows */ 1722 m = M/size + ((M % size) > rank); 1723 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1724 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1725 rowners[0] = 0; 1726 for (i=2; i<=size; i++) { 1727 rowners[i] += rowners[i-1]; 1728 } 1729 rstart = rowners[rank]; 1730 rend = rowners[rank+1]; 1731 1732 /* distribute row lengths to all processors */ 1733 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1734 offlens = ourlens + (rend-rstart); 1735 if (!rank) { 1736 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1737 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1738 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1739 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1740 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1741 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1742 } else { 1743 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1744 } 1745 1746 if (!rank) { 1747 /* calculate the number of nonzeros on each processor */ 1748 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1749 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1750 for (i=0; i<size; i++) { 1751 for (j=rowners[i]; j< rowners[i+1]; j++) { 1752 procsnz[i] += rowlengths[j]; 1753 } 1754 } 1755 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1756 1757 /* determine max buffer needed and allocate it */ 1758 maxnz = 0; 1759 for (i=0; i<size; i++) { 1760 maxnz = PetscMax(maxnz,procsnz[i]); 1761 } 1762 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1763 1764 /* read in my part of the matrix column indices */ 1765 nz = procsnz[0]; 1766 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1767 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1768 1769 /* read in every one elses and ship off */ 1770 for (i=1; i<size; i++) { 1771 nz = procsnz[i]; 1772 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1773 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1774 } 1775 ierr = PetscFree(cols);CHKERRQ(ierr); 1776 } else { 1777 /* determine buffer space needed for message */ 1778 nz = 0; 1779 for (i=0; i<m; i++) { 1780 nz += ourlens[i]; 1781 } 1782 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1783 1784 /* receive message of column indices*/ 1785 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1786 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1787 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1788 } 1789 1790 /* determine column ownership if matrix is not square */ 1791 if (N != M) { 1792 n = N/size + ((N % size) > rank); 1793 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1794 cstart = cend - n; 1795 } else { 1796 cstart = rstart; 1797 cend = rend; 1798 n = cend - cstart; 1799 } 1800 1801 /* loop over local rows, determining number of off diagonal entries */ 1802 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1803 jj = 0; 1804 for (i=0; i<m; i++) { 1805 for (j=0; j<ourlens[i]; j++) { 1806 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1807 jj++; 1808 } 1809 } 1810 1811 /* create our matrix */ 1812 for (i=0; i<m; i++) { 1813 ourlens[i] -= offlens[i]; 1814 } 1815 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1816 A = *newmat; 1817 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1818 for (i=0; i<m; i++) { 1819 ourlens[i] += offlens[i]; 1820 } 1821 1822 if (!rank) { 1823 ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr); 1824 1825 /* read in my part of the matrix numerical values */ 1826 nz = procsnz[0]; 1827 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1828 1829 /* insert into matrix */ 1830 jj = rstart; 1831 smycols = mycols; 1832 svals = vals; 1833 for (i=0; i<m; i++) { 1834 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1835 smycols += ourlens[i]; 1836 svals += ourlens[i]; 1837 jj++; 1838 } 1839 1840 /* read in other processors and ship out */ 1841 for (i=1; i<size; i++) { 1842 nz = procsnz[i]; 1843 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1844 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1845 } 1846 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1847 } else { 1848 /* receive numeric values */ 1849 ierr = PetscMalloc((nz+1)*sizeof(Scalar),&vals);CHKERRQ(ierr); 1850 1851 /* receive message of values*/ 1852 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1853 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1854 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1855 1856 /* insert into matrix */ 1857 jj = rstart; 1858 smycols = mycols; 1859 svals = vals; 1860 for (i=0; i<m; i++) { 1861 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1862 smycols += ourlens[i]; 1863 svals += ourlens[i]; 1864 jj++; 1865 } 1866 } 1867 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1868 ierr = PetscFree(vals);CHKERRQ(ierr); 1869 ierr = PetscFree(mycols);CHKERRQ(ierr); 1870 ierr = PetscFree(rowners);CHKERRQ(ierr); 1871 1872 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1873 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 EXTERN_C_END 1877 1878 #undef __FUNC__ 1879 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1880 /* 1881 Not great since it makes two copies of the submatrix, first an SeqAIJ 1882 in local and then by concatenating the local matrices the end result. 1883 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1884 */ 1885 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1886 { 1887 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1888 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1889 Mat *local,M,Mreuse; 1890 Scalar *vwork,*aa; 1891 MPI_Comm comm = mat->comm; 1892 Mat_SeqAIJ *aij; 1893 1894 1895 PetscFunctionBegin; 1896 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1897 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1898 1899 if (call == MAT_REUSE_MATRIX) { 1900 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1901 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1902 local = &Mreuse; 1903 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1904 } else { 1905 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1906 Mreuse = *local; 1907 ierr = PetscFree(local);CHKERRQ(ierr); 1908 } 1909 1910 /* 1911 m - number of local rows 1912 n - number of columns (same on all processors) 1913 rstart - first row in new global matrix generated 1914 */ 1915 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1916 if (call == MAT_INITIAL_MATRIX) { 1917 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1918 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1919 ii = aij->i; 1920 jj = aij->j; 1921 1922 /* 1923 Determine the number of non-zeros in the diagonal and off-diagonal 1924 portions of the matrix in order to do correct preallocation 1925 */ 1926 1927 /* first get start and end of "diagonal" columns */ 1928 if (csize == PETSC_DECIDE) { 1929 nlocal = n/size + ((n % size) > rank); 1930 } else { 1931 nlocal = csize; 1932 } 1933 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1934 rstart = rend - nlocal; 1935 if (rank == size - 1 && rend != n) { 1936 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1937 } 1938 1939 /* next, compute all the lengths */ 1940 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1941 olens = dlens + m; 1942 for (i=0; i<m; i++) { 1943 jend = ii[i+1] - ii[i]; 1944 olen = 0; 1945 dlen = 0; 1946 for (j=0; j<jend; j++) { 1947 if (*jj < rstart || *jj >= rend) olen++; 1948 else dlen++; 1949 jj++; 1950 } 1951 olens[i] = olen; 1952 dlens[i] = dlen; 1953 } 1954 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1955 ierr = PetscFree(dlens);CHKERRQ(ierr); 1956 } else { 1957 int ml,nl; 1958 1959 M = *newmat; 1960 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1961 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1962 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1963 /* 1964 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1965 rather than the slower MatSetValues(). 1966 */ 1967 M->was_assembled = PETSC_TRUE; 1968 M->assembled = PETSC_FALSE; 1969 } 1970 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1971 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1972 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1973 ii = aij->i; 1974 jj = aij->j; 1975 aa = aij->a; 1976 for (i=0; i<m; i++) { 1977 row = rstart + i; 1978 nz = ii[i+1] - ii[i]; 1979 cwork = jj; jj += nz; 1980 vwork = aa; aa += nz; 1981 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1982 } 1983 1984 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1985 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1986 *newmat = M; 1987 1988 /* save submatrix used in processor for next request */ 1989 if (call == MAT_INITIAL_MATRIX) { 1990 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1991 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1992 } 1993 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNC__ 1998 #define __FUNC__ "MatMPIAIJSetPreallocation" 1999 /*@C 2000 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2001 (the default parallel PETSc format). For good matrix assembly performance 2002 the user should preallocate the matrix storage by setting the parameters 2003 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2004 performance can be increased by more than a factor of 50. 2005 2006 Collective on MPI_Comm 2007 2008 Input Parameters: 2009 + A - the matrix 2010 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2011 (same value is used for all local rows) 2012 . d_nnz - array containing the number of nonzeros in the various rows of the 2013 DIAGONAL portion of the local submatrix (possibly different for each row) 2014 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2015 The size of this array is equal to the number of local rows, i.e 'm'. 2016 You must leave room for the diagonal entry even if it is zero. 2017 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2018 submatrix (same value is used for all local rows). 2019 - o_nnz - array containing the number of nonzeros in the various rows of the 2020 OFF-DIAGONAL portion of the local submatrix (possibly different for 2021 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2022 structure. The size of this array is equal to the number 2023 of local rows, i.e 'm'. 2024 2025 The AIJ format (also called the Yale sparse matrix format or 2026 compressed row storage), is fully compatible with standard Fortran 77 2027 storage. That is, the stored row and column indices can begin at 2028 either one (as in Fortran) or zero. See the users manual for details. 2029 2030 The user MUST specify either the local or global matrix dimensions 2031 (possibly both). 2032 2033 The parallel matrix is partitioned such that the first m0 rows belong to 2034 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2035 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2036 2037 The DIAGONAL portion of the local submatrix of a processor can be defined 2038 as the submatrix which is obtained by extraction the part corresponding 2039 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2040 first row that belongs to the processor, and r2 is the last row belonging 2041 to the this processor. This is a square mxm matrix. The remaining portion 2042 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2043 2044 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2045 2046 By default, this format uses inodes (identical nodes) when possible. 2047 We search for consecutive rows with the same nonzero structure, thereby 2048 reusing matrix information to achieve increased efficiency. 2049 2050 Options Database Keys: 2051 + -mat_aij_no_inode - Do not use inodes 2052 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2053 - -mat_aij_oneindex - Internally use indexing starting at 1 2054 rather than 0. Note that when calling MatSetValues(), 2055 the user still MUST index entries starting at 0! 2056 2057 Example usage: 2058 2059 Consider the following 8x8 matrix with 34 non-zero values, that is 2060 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2061 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2062 as follows: 2063 2064 .vb 2065 1 2 0 | 0 3 0 | 0 4 2066 Proc0 0 5 6 | 7 0 0 | 8 0 2067 9 0 10 | 11 0 0 | 12 0 2068 ------------------------------------- 2069 13 0 14 | 15 16 17 | 0 0 2070 Proc1 0 18 0 | 19 20 21 | 0 0 2071 0 0 0 | 22 23 0 | 24 0 2072 ------------------------------------- 2073 Proc2 25 26 27 | 0 0 28 | 29 0 2074 30 0 0 | 31 32 33 | 0 34 2075 .ve 2076 2077 This can be represented as a collection of submatrices as: 2078 2079 .vb 2080 A B C 2081 D E F 2082 G H I 2083 .ve 2084 2085 Where the submatrices A,B,C are owned by proc0, D,E,F are 2086 owned by proc1, G,H,I are owned by proc2. 2087 2088 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2089 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2090 The 'M','N' parameters are 8,8, and have the same values on all procs. 2091 2092 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2093 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2094 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2095 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2096 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2097 matrix, ans [DF] as another SeqAIJ matrix. 2098 2099 When d_nz, o_nz parameters are specified, d_nz storage elements are 2100 allocated for every row of the local diagonal submatrix, and o_nz 2101 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2102 One way to choose d_nz and o_nz is to use the max nonzerors per local 2103 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2104 In this case, the values of d_nz,o_nz are: 2105 .vb 2106 proc0 : dnz = 2, o_nz = 2 2107 proc1 : dnz = 3, o_nz = 2 2108 proc2 : dnz = 1, o_nz = 4 2109 .ve 2110 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2111 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2112 for proc3. i.e we are using 12+15+10=37 storage locations to store 2113 34 values. 2114 2115 When d_nnz, o_nnz parameters are specified, the storage is specified 2116 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2117 In the above case the values for d_nnz,o_nnz are: 2118 .vb 2119 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2120 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2121 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2122 .ve 2123 Here the space allocated is sum of all the above values i.e 34, and 2124 hence pre-allocation is perfect. 2125 2126 Level: intermediate 2127 2128 .keywords: matrix, aij, compressed row, sparse, parallel 2129 2130 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2131 @*/ 2132 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2133 { 2134 Mat_MPIAIJ *b; 2135 int ierr,i; 2136 PetscTruth flg2; 2137 2138 PetscFunctionBegin; 2139 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2140 if (!flg2) PetscFunctionReturn(0); 2141 B->preallocated = PETSC_TRUE; 2142 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -2: value %d",d_nz); 2143 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -2: value %d",o_nz); 2144 if (d_nnz) { 2145 for (i=0; i<B->m; i++) { 2146 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2147 } 2148 } 2149 if (o_nnz) { 2150 for (i=0; i<B->m; i++) { 2151 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2152 } 2153 } 2154 b = (Mat_MPIAIJ*)B->data; 2155 2156 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2157 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2158 PetscLogObjectParent(B,b->A); 2159 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2160 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2161 PetscLogObjectParent(B,b->B); 2162 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #undef __FUNC__ 2167 #define __FUNC__ "MatCreateMPIAIJ" 2168 /*@C 2169 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2170 (the default parallel PETSc format). For good matrix assembly performance 2171 the user should preallocate the matrix storage by setting the parameters 2172 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2173 performance can be increased by more than a factor of 50. 2174 2175 Collective on MPI_Comm 2176 2177 Input Parameters: 2178 + comm - MPI communicator 2179 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2180 This value should be the same as the local size used in creating the 2181 y vector for the matrix-vector product y = Ax. 2182 . n - This value should be the same as the local size used in creating the 2183 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2184 calculated if N is given) For square matrices n is almost always m. 2185 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2186 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2187 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2188 (same value is used for all local rows) 2189 . d_nnz - array containing the number of nonzeros in the various rows of the 2190 DIAGONAL portion of the local submatrix (possibly different for each row) 2191 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2192 The size of this array is equal to the number of local rows, i.e 'm'. 2193 You must leave room for the diagonal entry even if it is zero. 2194 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2195 submatrix (same value is used for all local rows). 2196 - o_nnz - array containing the number of nonzeros in the various rows of the 2197 OFF-DIAGONAL portion of the local submatrix (possibly different for 2198 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2199 structure. The size of this array is equal to the number 2200 of local rows, i.e 'm'. 2201 2202 Output Parameter: 2203 . A - the matrix 2204 2205 Notes: 2206 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2207 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2208 storage requirements for this matrix. 2209 2210 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2211 processor than it must be used on all processors that share the object for 2212 that argument. 2213 2214 The AIJ format (also called the Yale sparse matrix format or 2215 compressed row storage), is fully compatible with standard Fortran 77 2216 storage. That is, the stored row and column indices can begin at 2217 either one (as in Fortran) or zero. See the users manual for details. 2218 2219 The user MUST specify either the local or global matrix dimensions 2220 (possibly both). 2221 2222 The parallel matrix is partitioned such that the first m0 rows belong to 2223 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2224 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2225 2226 The DIAGONAL portion of the local submatrix of a processor can be defined 2227 as the submatrix which is obtained by extraction the part corresponding 2228 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2229 first row that belongs to the processor, and r2 is the last row belonging 2230 to the this processor. This is a square mxm matrix. The remaining portion 2231 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2232 2233 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2234 2235 By default, this format uses inodes (identical nodes) when possible. 2236 We search for consecutive rows with the same nonzero structure, thereby 2237 reusing matrix information to achieve increased efficiency. 2238 2239 Options Database Keys: 2240 + -mat_aij_no_inode - Do not use inodes 2241 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2242 - -mat_aij_oneindex - Internally use indexing starting at 1 2243 rather than 0. Note that when calling MatSetValues(), 2244 the user still MUST index entries starting at 0! 2245 2246 2247 Example usage: 2248 2249 Consider the following 8x8 matrix with 34 non-zero values, that is 2250 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2251 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2252 as follows: 2253 2254 .vb 2255 1 2 0 | 0 3 0 | 0 4 2256 Proc0 0 5 6 | 7 0 0 | 8 0 2257 9 0 10 | 11 0 0 | 12 0 2258 ------------------------------------- 2259 13 0 14 | 15 16 17 | 0 0 2260 Proc1 0 18 0 | 19 20 21 | 0 0 2261 0 0 0 | 22 23 0 | 24 0 2262 ------------------------------------- 2263 Proc2 25 26 27 | 0 0 28 | 29 0 2264 30 0 0 | 31 32 33 | 0 34 2265 .ve 2266 2267 This can be represented as a collection of submatrices as: 2268 2269 .vb 2270 A B C 2271 D E F 2272 G H I 2273 .ve 2274 2275 Where the submatrices A,B,C are owned by proc0, D,E,F are 2276 owned by proc1, G,H,I are owned by proc2. 2277 2278 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2279 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2280 The 'M','N' parameters are 8,8, and have the same values on all procs. 2281 2282 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2283 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2284 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2285 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2286 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2287 matrix, ans [DF] as another SeqAIJ matrix. 2288 2289 When d_nz, o_nz parameters are specified, d_nz storage elements are 2290 allocated for every row of the local diagonal submatrix, and o_nz 2291 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2292 One way to choose d_nz and o_nz is to use the max nonzerors per local 2293 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2294 In this case, the values of d_nz,o_nz are: 2295 .vb 2296 proc0 : dnz = 2, o_nz = 2 2297 proc1 : dnz = 3, o_nz = 2 2298 proc2 : dnz = 1, o_nz = 4 2299 .ve 2300 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2301 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2302 for proc3. i.e we are using 12+15+10=37 storage locations to store 2303 34 values. 2304 2305 When d_nnz, o_nnz parameters are specified, the storage is specified 2306 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2307 In the above case the values for d_nnz,o_nnz are: 2308 .vb 2309 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2310 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2311 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2312 .ve 2313 Here the space allocated is sum of all the above values i.e 34, and 2314 hence pre-allocation is perfect. 2315 2316 Level: intermediate 2317 2318 .keywords: matrix, aij, compressed row, sparse, parallel 2319 2320 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2321 @*/ 2322 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2323 { 2324 int ierr,size; 2325 2326 PetscFunctionBegin; 2327 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2328 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2329 if (size > 1) { 2330 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2331 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2332 } else { 2333 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2334 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2335 } 2336 PetscFunctionReturn(0); 2337 } 2338