1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 11 { 12 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 17 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 18 PetscFunctionReturn(0); 19 } 20 21 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 22 { 23 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 28 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 29 PetscFunctionReturn(0); 30 } 31 32 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 33 { \ 34 \ 35 brow = row/bs; \ 36 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 37 rmax = aimax[brow]; nrow = ailen[brow]; \ 38 bcol = col/bs; \ 39 ridx = row % bs; cidx = col % bs; \ 40 low = 0; high = nrow; \ 41 while (high-low > 3) { \ 42 t = (low+high)/2; \ 43 if (rp[t] > bcol) high = t; \ 44 else low = t; \ 45 } \ 46 for (_i=low; _i<high; _i++) { \ 47 if (rp[_i] > bcol) break; \ 48 if (rp[_i] == bcol) { \ 49 bap = ap + bs2*_i + bs*cidx + ridx; \ 50 if (addv == ADD_VALUES) *bap += value; \ 51 else *bap = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (a->nonew == 1) goto a_noinsert; \ 56 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 57 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 58 N = nrow++ - 1; \ 59 /* shift up all the later entries in this row */ \ 60 for (ii=N; ii>=_i; ii--) { \ 61 rp[ii+1] = rp[ii]; \ 62 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 63 } \ 64 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 65 rp[_i] = bcol; \ 66 ap[bs2*_i + bs*cidx + ridx] = value; \ 67 A->nonzerostate++;\ 68 a_noinsert:; \ 69 ailen[brow] = nrow; \ 70 } 71 72 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 73 { \ 74 brow = row/bs; \ 75 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 76 rmax = bimax[brow]; nrow = bilen[brow]; \ 77 bcol = col/bs; \ 78 ridx = row % bs; cidx = col % bs; \ 79 low = 0; high = nrow; \ 80 while (high-low > 3) { \ 81 t = (low+high)/2; \ 82 if (rp[t] > bcol) high = t; \ 83 else low = t; \ 84 } \ 85 for (_i=low; _i<high; _i++) { \ 86 if (rp[_i] > bcol) break; \ 87 if (rp[_i] == bcol) { \ 88 bap = ap + bs2*_i + bs*cidx + ridx; \ 89 if (addv == ADD_VALUES) *bap += value; \ 90 else *bap = value; \ 91 goto b_noinsert; \ 92 } \ 93 } \ 94 if (b->nonew == 1) goto b_noinsert; \ 95 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 96 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 97 N = nrow++ - 1; \ 98 /* shift up all the later entries in this row */ \ 99 for (ii=N; ii>=_i; ii--) { \ 100 rp[ii+1] = rp[ii]; \ 101 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 102 } \ 103 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 104 rp[_i] = bcol; \ 105 ap[bs2*_i + bs*cidx + ridx] = value; \ 106 B->nonzerostate++;\ 107 b_noinsert:; \ 108 bilen[brow] = nrow; \ 109 } 110 111 /* Only add/insert a(i,j) with i<=j (blocks). 112 Any a(i,j) with i>j input by user is ingored. 113 */ 114 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 115 { 116 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 117 MatScalar value; 118 PetscBool roworiented = baij->roworiented; 119 PetscErrorCode ierr; 120 PetscInt i,j,row,col; 121 PetscInt rstart_orig=mat->rmap->rstart; 122 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 123 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 124 125 /* Some Variables required in the macro */ 126 Mat A = baij->A; 127 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 128 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 129 MatScalar *aa =a->a; 130 131 Mat B = baij->B; 132 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 133 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 134 MatScalar *ba =b->a; 135 136 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 137 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 138 MatScalar *ap,*bap; 139 140 /* for stash */ 141 PetscInt n_loc, *in_loc = NULL; 142 MatScalar *v_loc = NULL; 143 144 PetscFunctionBegin; 145 if (!baij->donotstash) { 146 if (n > baij->n_loc) { 147 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 148 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 149 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 150 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 151 152 baij->n_loc = n; 153 } 154 in_loc = baij->in_loc; 155 v_loc = baij->v_loc; 156 } 157 158 for (i=0; i<m; i++) { 159 if (im[i] < 0) continue; 160 #if defined(PETSC_USE_DEBUG) 161 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 162 #endif 163 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 164 row = im[i] - rstart_orig; /* local row index */ 165 for (j=0; j<n; j++) { 166 if (im[i]/bs > in[j]/bs) { 167 if (a->ignore_ltriangular) { 168 continue; /* ignore lower triangular blocks */ 169 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 170 } 171 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 172 col = in[j] - cstart_orig; /* local col index */ 173 brow = row/bs; bcol = col/bs; 174 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 175 if (roworiented) value = v[i*n+j]; 176 else value = v[i+j*m]; 177 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 178 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 179 } else if (in[j] < 0) continue; 180 #if defined(PETSC_USE_DEBUG) 181 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 182 #endif 183 else { /* off-diag entry (B) */ 184 if (mat->was_assembled) { 185 if (!baij->colmap) { 186 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 187 } 188 #if defined(PETSC_USE_CTABLE) 189 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 190 col = col - 1; 191 #else 192 col = baij->colmap[in[j]/bs] - 1; 193 #endif 194 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 195 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 196 col = in[j]; 197 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 198 B = baij->B; 199 b = (Mat_SeqBAIJ*)(B)->data; 200 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 201 ba = b->a; 202 } else col += in[j]%bs; 203 } else col = in[j]; 204 if (roworiented) value = v[i*n+j]; 205 else value = v[i+j*m]; 206 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 207 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 208 } 209 } 210 } else { /* off processor entry */ 211 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 212 if (!baij->donotstash) { 213 mat->assembled = PETSC_FALSE; 214 n_loc = 0; 215 for (j=0; j<n; j++) { 216 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 217 in_loc[n_loc] = in[j]; 218 if (roworiented) { 219 v_loc[n_loc] = v[i*n+j]; 220 } else { 221 v_loc[n_loc] = v[j*m+i]; 222 } 223 n_loc++; 224 } 225 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 226 } 227 } 228 } 229 PetscFunctionReturn(0); 230 } 231 232 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 233 { 234 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 235 PetscErrorCode ierr; 236 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 237 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 238 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 239 PetscBool roworiented=a->roworiented; 240 const PetscScalar *value = v; 241 MatScalar *ap,*aa = a->a,*bap; 242 243 PetscFunctionBegin; 244 if (col < row) { 245 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 246 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 247 } 248 rp = aj + ai[row]; 249 ap = aa + bs2*ai[row]; 250 rmax = imax[row]; 251 nrow = ailen[row]; 252 value = v; 253 low = 0; 254 high = nrow; 255 256 while (high-low > 7) { 257 t = (low+high)/2; 258 if (rp[t] > col) high = t; 259 else low = t; 260 } 261 for (i=low; i<high; i++) { 262 if (rp[i] > col) break; 263 if (rp[i] == col) { 264 bap = ap + bs2*i; 265 if (roworiented) { 266 if (is == ADD_VALUES) { 267 for (ii=0; ii<bs; ii++) { 268 for (jj=ii; jj<bs2; jj+=bs) { 269 bap[jj] += *value++; 270 } 271 } 272 } else { 273 for (ii=0; ii<bs; ii++) { 274 for (jj=ii; jj<bs2; jj+=bs) { 275 bap[jj] = *value++; 276 } 277 } 278 } 279 } else { 280 if (is == ADD_VALUES) { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=0; jj<bs; jj++) { 283 *bap++ += *value++; 284 } 285 } 286 } else { 287 for (ii=0; ii<bs; ii++) { 288 for (jj=0; jj<bs; jj++) { 289 *bap++ = *value++; 290 } 291 } 292 } 293 } 294 goto noinsert2; 295 } 296 } 297 if (nonew == 1) goto noinsert2; 298 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 299 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 300 N = nrow++ - 1; high++; 301 /* shift up all the later entries in this row */ 302 for (ii=N; ii>=i; ii--) { 303 rp[ii+1] = rp[ii]; 304 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 305 } 306 if (N >= i) { 307 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 308 } 309 rp[i] = col; 310 bap = ap + bs2*i; 311 if (roworiented) { 312 for (ii=0; ii<bs; ii++) { 313 for (jj=ii; jj<bs2; jj+=bs) { 314 bap[jj] = *value++; 315 } 316 } 317 } else { 318 for (ii=0; ii<bs; ii++) { 319 for (jj=0; jj<bs; jj++) { 320 *bap++ = *value++; 321 } 322 } 323 } 324 noinsert2:; 325 ailen[row] = nrow; 326 PetscFunctionReturn(0); 327 } 328 329 /* 330 This routine is exactly duplicated in mpibaij.c 331 */ 332 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 333 { 334 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 335 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 336 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 337 PetscErrorCode ierr; 338 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 339 PetscBool roworiented=a->roworiented; 340 const PetscScalar *value = v; 341 MatScalar *ap,*aa = a->a,*bap; 342 343 PetscFunctionBegin; 344 rp = aj + ai[row]; 345 ap = aa + bs2*ai[row]; 346 rmax = imax[row]; 347 nrow = ailen[row]; 348 low = 0; 349 high = nrow; 350 value = v; 351 while (high-low > 7) { 352 t = (low+high)/2; 353 if (rp[t] > col) high = t; 354 else low = t; 355 } 356 for (i=low; i<high; i++) { 357 if (rp[i] > col) break; 358 if (rp[i] == col) { 359 bap = ap + bs2*i; 360 if (roworiented) { 361 if (is == ADD_VALUES) { 362 for (ii=0; ii<bs; ii++) { 363 for (jj=ii; jj<bs2; jj+=bs) { 364 bap[jj] += *value++; 365 } 366 } 367 } else { 368 for (ii=0; ii<bs; ii++) { 369 for (jj=ii; jj<bs2; jj+=bs) { 370 bap[jj] = *value++; 371 } 372 } 373 } 374 } else { 375 if (is == ADD_VALUES) { 376 for (ii=0; ii<bs; ii++,value+=bs) { 377 for (jj=0; jj<bs; jj++) { 378 bap[jj] += value[jj]; 379 } 380 bap += bs; 381 } 382 } else { 383 for (ii=0; ii<bs; ii++,value+=bs) { 384 for (jj=0; jj<bs; jj++) { 385 bap[jj] = value[jj]; 386 } 387 bap += bs; 388 } 389 } 390 } 391 goto noinsert2; 392 } 393 } 394 if (nonew == 1) goto noinsert2; 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 396 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 397 N = nrow++ - 1; high++; 398 /* shift up all the later entries in this row */ 399 for (ii=N; ii>=i; ii--) { 400 rp[ii+1] = rp[ii]; 401 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 402 } 403 if (N >= i) { 404 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 405 } 406 rp[i] = col; 407 bap = ap + bs2*i; 408 if (roworiented) { 409 for (ii=0; ii<bs; ii++) { 410 for (jj=ii; jj<bs2; jj+=bs) { 411 bap[jj] = *value++; 412 } 413 } 414 } else { 415 for (ii=0; ii<bs; ii++) { 416 for (jj=0; jj<bs; jj++) { 417 *bap++ = *value++; 418 } 419 } 420 } 421 noinsert2:; 422 ailen[row] = nrow; 423 PetscFunctionReturn(0); 424 } 425 426 /* 427 This routine could be optimized by removing the need for the block copy below and passing stride information 428 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 429 */ 430 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 431 { 432 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 433 const MatScalar *value; 434 MatScalar *barray =baij->barray; 435 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 436 PetscErrorCode ierr; 437 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 438 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 439 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 440 441 PetscFunctionBegin; 442 if (!barray) { 443 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 444 baij->barray = barray; 445 } 446 447 if (roworiented) { 448 stepval = (n-1)*bs; 449 } else { 450 stepval = (m-1)*bs; 451 } 452 for (i=0; i<m; i++) { 453 if (im[i] < 0) continue; 454 #if defined(PETSC_USE_DEBUG) 455 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 456 #endif 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 for (j=0; j<n; j++) { 460 if (im[i] > in[j]) { 461 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 462 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 463 } 464 /* If NumCol = 1 then a copy is not required */ 465 if ((roworiented) && (n == 1)) { 466 barray = (MatScalar*) v + i*bs2; 467 } else if ((!roworiented) && (m == 1)) { 468 barray = (MatScalar*) v + j*bs2; 469 } else { /* Here a copy is required */ 470 if (roworiented) { 471 value = v + i*(stepval+bs)*bs + j*bs; 472 } else { 473 value = v + j*(stepval+bs)*bs + i*bs; 474 } 475 for (ii=0; ii<bs; ii++,value+=stepval) { 476 for (jj=0; jj<bs; jj++) { 477 *barray++ = *value++; 478 } 479 } 480 barray -=bs2; 481 } 482 483 if (in[j] >= cstart && in[j] < cend) { 484 col = in[j] - cstart; 485 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 486 } else if (in[j] < 0) continue; 487 #if defined(PETSC_USE_DEBUG) 488 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 489 #endif 490 else { 491 if (mat->was_assembled) { 492 if (!baij->colmap) { 493 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 494 } 495 496 #if defined(PETSC_USE_DEBUG) 497 #if defined(PETSC_USE_CTABLE) 498 { PetscInt data; 499 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 500 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 501 } 502 #else 503 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 504 #endif 505 #endif 506 #if defined(PETSC_USE_CTABLE) 507 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 508 col = (col - 1)/bs; 509 #else 510 col = (baij->colmap[in[j]] - 1)/bs; 511 #endif 512 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 513 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 514 col = in[j]; 515 } 516 } else col = in[j]; 517 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 518 } 519 } 520 } else { 521 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 522 if (!baij->donotstash) { 523 if (roworiented) { 524 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 525 } else { 526 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 527 } 528 } 529 } 530 } 531 PetscFunctionReturn(0); 532 } 533 534 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 535 { 536 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 537 PetscErrorCode ierr; 538 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 539 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 540 541 PetscFunctionBegin; 542 for (i=0; i<m; i++) { 543 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 544 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 545 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 546 row = idxm[i] - bsrstart; 547 for (j=0; j<n; j++) { 548 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 549 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 550 if (idxn[j] >= bscstart && idxn[j] < bscend) { 551 col = idxn[j] - bscstart; 552 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 553 } else { 554 if (!baij->colmap) { 555 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 556 } 557 #if defined(PETSC_USE_CTABLE) 558 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 559 data--; 560 #else 561 data = baij->colmap[idxn[j]/bs]-1; 562 #endif 563 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 564 else { 565 col = data + idxn[j]%bs; 566 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } 568 } 569 } 570 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 571 } 572 PetscFunctionReturn(0); 573 } 574 575 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 576 { 577 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 578 PetscErrorCode ierr; 579 PetscReal sum[2],*lnorm2; 580 581 PetscFunctionBegin; 582 if (baij->size == 1) { 583 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 584 } else { 585 if (type == NORM_FROBENIUS) { 586 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 587 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 588 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 589 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 590 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 591 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 592 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 593 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 594 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 595 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 596 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 597 PetscReal *rsum,*rsum2,vabs; 598 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 599 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 600 MatScalar *v; 601 602 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 603 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 604 /* Amat */ 605 v = amat->a; jj = amat->j; 606 for (brow=0; brow<mbs; brow++) { 607 grow = bs*(rstart + brow); 608 nz = amat->i[brow+1] - amat->i[brow]; 609 for (bcol=0; bcol<nz; bcol++) { 610 gcol = bs*(rstart + *jj); jj++; 611 for (col=0; col<bs; col++) { 612 for (row=0; row<bs; row++) { 613 vabs = PetscAbsScalar(*v); v++; 614 rsum[gcol+col] += vabs; 615 /* non-diagonal block */ 616 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 617 } 618 } 619 } 620 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 621 } 622 /* Bmat */ 623 v = bmat->a; jj = bmat->j; 624 for (brow=0; brow<mbs; brow++) { 625 grow = bs*(rstart + brow); 626 nz = bmat->i[brow+1] - bmat->i[brow]; 627 for (bcol=0; bcol<nz; bcol++) { 628 gcol = bs*garray[*jj]; jj++; 629 for (col=0; col<bs; col++) { 630 for (row=0; row<bs; row++) { 631 vabs = PetscAbsScalar(*v); v++; 632 rsum[gcol+col] += vabs; 633 rsum[grow+row] += vabs; 634 } 635 } 636 } 637 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 638 } 639 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 *norm = 0.0; 641 for (col=0; col<mat->cmap->N; col++) { 642 if (rsum2[col] > *norm) *norm = rsum2[col]; 643 } 644 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 645 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 646 } 647 PetscFunctionReturn(0); 648 } 649 650 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 651 { 652 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 653 PetscErrorCode ierr; 654 PetscInt nstash,reallocs; 655 656 PetscFunctionBegin; 657 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 658 659 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 660 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 664 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 668 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 671 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 672 PetscErrorCode ierr; 673 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 674 PetscInt *row,*col; 675 PetscBool other_disassembled; 676 PetscMPIInt n; 677 PetscBool r1,r2,r3; 678 MatScalar *val; 679 680 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 681 PetscFunctionBegin; 682 if (!baij->donotstash && !mat->nooffprocentries) { 683 while (1) { 684 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 685 if (!flg) break; 686 687 for (i=0; i<n;) { 688 /* Now identify the consecutive vals belonging to the same row */ 689 for (j=i,rstart=row[j]; j<n; j++) { 690 if (row[j] != rstart) break; 691 } 692 if (j < n) ncols = j-i; 693 else ncols = n-i; 694 /* Now assemble all these values with a single function call */ 695 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 696 i = j; 697 } 698 } 699 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 700 /* Now process the block-stash. Since the values are stashed column-oriented, 701 set the roworiented flag to column oriented, and after MatSetValues() 702 restore the original flags */ 703 r1 = baij->roworiented; 704 r2 = a->roworiented; 705 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 706 707 baij->roworiented = PETSC_FALSE; 708 a->roworiented = PETSC_FALSE; 709 710 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 711 while (1) { 712 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 713 if (!flg) break; 714 715 for (i=0; i<n;) { 716 /* Now identify the consecutive vals belonging to the same row */ 717 for (j=i,rstart=row[j]; j<n; j++) { 718 if (row[j] != rstart) break; 719 } 720 if (j < n) ncols = j-i; 721 else ncols = n-i; 722 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 723 i = j; 724 } 725 } 726 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 727 728 baij->roworiented = r1; 729 a->roworiented = r2; 730 731 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 732 } 733 734 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 735 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 736 737 /* determine if any processor has disassembled, if so we must 738 also disassemble ourselfs, in order that we may reassemble. */ 739 /* 740 if nonzero structure of submatrix B cannot change then we know that 741 no processor disassembled thus we can skip this stuff 742 */ 743 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 744 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 745 if (mat->was_assembled && !other_disassembled) { 746 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 747 } 748 } 749 750 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 751 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 752 } 753 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 754 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 755 756 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 757 758 baij->rowvalues = 0; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 762 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 769 #include <petscdraw.h> 770 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 771 { 772 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 773 PetscErrorCode ierr; 774 PetscInt bs = mat->rmap->bs; 775 PetscMPIInt rank = baij->rank; 776 PetscBool iascii,isdraw; 777 PetscViewer sviewer; 778 PetscViewerFormat format; 779 780 PetscFunctionBegin; 781 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 782 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 783 if (iascii) { 784 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 785 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 786 MatInfo info; 787 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 788 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 789 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 790 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 791 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 792 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 793 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 794 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 795 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 796 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 797 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 798 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 799 PetscFunctionReturn(0); 800 } else if (format == PETSC_VIEWER_ASCII_INFO) { 801 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 802 PetscFunctionReturn(0); 803 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 804 PetscFunctionReturn(0); 805 } 806 } 807 808 if (isdraw) { 809 PetscDraw draw; 810 PetscBool isnull; 811 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 812 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 813 if (isnull) PetscFunctionReturn(0); 814 } 815 816 { 817 /* assemble the entire matrix onto first processor. */ 818 Mat A; 819 Mat_SeqSBAIJ *Aloc; 820 Mat_SeqBAIJ *Bloc; 821 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 822 MatScalar *a; 823 const char *matname; 824 825 /* Should this be the same type as mat? */ 826 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 827 if (!rank) { 828 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 829 } else { 830 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 831 } 832 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 833 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 834 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 835 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 836 837 /* copy over the A part */ 838 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 839 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 840 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 841 842 for (i=0; i<mbs; i++) { 843 rvals[0] = bs*(baij->rstartbs + i); 844 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 845 for (j=ai[i]; j<ai[i+1]; j++) { 846 col = (baij->cstartbs+aj[j])*bs; 847 for (k=0; k<bs; k++) { 848 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 849 col++; 850 a += bs; 851 } 852 } 853 } 854 /* copy over the B part */ 855 Bloc = (Mat_SeqBAIJ*)baij->B->data; 856 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 857 for (i=0; i<mbs; i++) { 858 859 rvals[0] = bs*(baij->rstartbs + i); 860 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 861 for (j=ai[i]; j<ai[i+1]; j++) { 862 col = baij->garray[aj[j]]*bs; 863 for (k=0; k<bs; k++) { 864 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 865 col++; 866 a += bs; 867 } 868 } 869 } 870 ierr = PetscFree(rvals);CHKERRQ(ierr); 871 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 872 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 873 /* 874 Everyone has to call to draw the matrix since the graphics waits are 875 synchronized across all processors that share the PetscDraw object 876 */ 877 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 878 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 879 if (!rank) { 880 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 881 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 882 } 883 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 884 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 885 ierr = MatDestroy(&A);CHKERRQ(ierr); 886 } 887 PetscFunctionReturn(0); 888 } 889 890 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 891 { 892 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 893 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 894 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 895 PetscErrorCode ierr; 896 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 897 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 898 int fd; 899 PetscScalar *column_values; 900 FILE *file; 901 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 902 PetscInt message_count,flowcontrolcount; 903 904 PetscFunctionBegin; 905 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 906 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 907 nz = bs2*(A->nz + B->nz); 908 rlen = mat->rmap->n; 909 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 910 if (!rank) { 911 header[0] = MAT_FILE_CLASSID; 912 header[1] = mat->rmap->N; 913 header[2] = mat->cmap->N; 914 915 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 916 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 917 /* get largest number of rows any processor has */ 918 range = mat->rmap->range; 919 for (i=1; i<size; i++) { 920 rlen = PetscMax(rlen,range[i+1] - range[i]); 921 } 922 } else { 923 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 924 } 925 926 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 927 /* compute lengths of each row */ 928 for (i=0; i<a->mbs; i++) { 929 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 930 } 931 /* store the row lengths to the file */ 932 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 933 if (!rank) { 934 MPI_Status status; 935 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 936 rlen = (range[1] - range[0])/bs; 937 for (i=0; i<rlen; i++) { 938 for (j=0; j<bs; j++) { 939 row_lens[i*bs+j] = bs*crow_lens[i]; 940 } 941 } 942 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 943 for (i=1; i<size; i++) { 944 rlen = (range[i+1] - range[i])/bs; 945 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 946 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 947 for (k=0; k<rlen; k++) { 948 for (j=0; j<bs; j++) { 949 row_lens[k*bs+j] = bs*crow_lens[k]; 950 } 951 } 952 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 953 } 954 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 955 ierr = PetscFree(row_lens);CHKERRQ(ierr); 956 } else { 957 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 958 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 959 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 960 } 961 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 962 963 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 964 information needed to make it for each row from a block row. This does require more communication but still not more than 965 the communication needed for the nonzero values */ 966 nzmax = nz; /* space a largest processor needs */ 967 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 968 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 969 cnt = 0; 970 for (i=0; i<a->mbs; i++) { 971 pcnt = cnt; 972 for (j=B->i[i]; j<B->i[i+1]; j++) { 973 if ((col = garray[B->j[j]]) > cstart) break; 974 for (l=0; l<bs; l++) { 975 column_indices[cnt++] = bs*col+l; 976 } 977 } 978 for (k=A->i[i]; k<A->i[i+1]; k++) { 979 for (l=0; l<bs; l++) { 980 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 981 } 982 } 983 for (; j<B->i[i+1]; j++) { 984 for (l=0; l<bs; l++) { 985 column_indices[cnt++] = bs*garray[B->j[j]]+l; 986 } 987 } 988 len = cnt - pcnt; 989 for (k=1; k<bs; k++) { 990 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 991 cnt += len; 992 } 993 } 994 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 995 996 /* store the columns to the file */ 997 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 998 if (!rank) { 999 MPI_Status status; 1000 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1001 for (i=1; i<size; i++) { 1002 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1003 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1004 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1005 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1006 } 1007 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1008 } else { 1009 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1010 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1011 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1012 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the numerical values */ 1017 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<a->mbs; i++) { 1020 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1021 for (j=B->i[i]; j<B->i[i+1]; j++) { 1022 if (garray[B->j[j]] > cstart) break; 1023 for (l=0; l<bs; l++) { 1024 for (ll=0; ll<bs; ll++) { 1025 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1026 } 1027 } 1028 cnt += bs; 1029 } 1030 for (k=A->i[i]; k<A->i[i+1]; k++) { 1031 for (l=0; l<bs; l++) { 1032 for (ll=0; ll<bs; ll++) { 1033 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1034 } 1035 } 1036 cnt += bs; 1037 } 1038 for (; j<B->i[i+1]; j++) { 1039 for (l=0; l<bs; l++) { 1040 for (ll=0; ll<bs; ll++) { 1041 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1042 } 1043 } 1044 cnt += bs; 1045 } 1046 cnt += (bs-1)*rlen; 1047 } 1048 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1049 1050 /* store the column values to the file */ 1051 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1052 if (!rank) { 1053 MPI_Status status; 1054 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1055 for (i=1; i<size; i++) { 1056 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1057 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1058 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1059 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1060 } 1061 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1062 } else { 1063 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1064 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1065 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1066 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1067 } 1068 ierr = PetscFree(column_values);CHKERRQ(ierr); 1069 1070 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1071 if (file) { 1072 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1073 } 1074 PetscFunctionReturn(0); 1075 } 1076 1077 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1078 { 1079 PetscErrorCode ierr; 1080 PetscBool iascii,isdraw,issocket,isbinary; 1081 1082 PetscFunctionBegin; 1083 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1084 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1085 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1086 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1087 if (iascii || isdraw || issocket) { 1088 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1089 } else if (isbinary) { 1090 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1091 } 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1096 { 1097 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 #if defined(PETSC_USE_LOG) 1102 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1103 #endif 1104 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1105 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1106 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1107 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1108 #if defined(PETSC_USE_CTABLE) 1109 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1110 #else 1111 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1112 #endif 1113 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1114 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1115 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1116 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1117 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1118 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1119 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1120 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1121 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1122 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1123 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1124 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1125 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1126 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1127 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1128 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1129 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1130 #endif 1131 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1132 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1133 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1134 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1135 1136 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1140 #if defined(PETSC_HAVE_ELEMENTAL) 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1142 #endif 1143 PetscFunctionReturn(0); 1144 } 1145 1146 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1147 { 1148 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1149 PetscErrorCode ierr; 1150 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1151 PetscScalar *from; 1152 const PetscScalar *x; 1153 1154 PetscFunctionBegin; 1155 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1156 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1157 1158 /* diagonal part */ 1159 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1160 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1161 1162 /* subdiagonal part */ 1163 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1164 1165 /* copy x into the vec slvec0 */ 1166 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1167 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1168 1169 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1170 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1171 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1172 1173 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1174 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1175 /* supperdiagonal part */ 1176 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1181 { 1182 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1183 PetscErrorCode ierr; 1184 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1185 PetscScalar *from; 1186 const PetscScalar *x; 1187 1188 PetscFunctionBegin; 1189 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1190 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1191 1192 /* diagonal part */ 1193 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1194 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1195 1196 /* subdiagonal part */ 1197 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1198 1199 /* copy x into the vec slvec0 */ 1200 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1201 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1202 1203 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1204 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1205 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1206 1207 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1208 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1209 /* supperdiagonal part */ 1210 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1215 { 1216 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 PetscInt nt; 1219 1220 PetscFunctionBegin; 1221 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1222 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1223 1224 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1225 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1226 1227 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 /* do diagonal part */ 1229 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1230 /* do supperdiagonal part */ 1231 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1233 /* do subdiagonal part */ 1234 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1235 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1236 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1241 { 1242 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1243 PetscErrorCode ierr; 1244 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1245 PetscScalar *from,zero=0.0; 1246 const PetscScalar *x; 1247 1248 PetscFunctionBegin; 1249 /* 1250 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1251 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1252 */ 1253 /* diagonal part */ 1254 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1255 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1256 1257 /* subdiagonal part */ 1258 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1259 1260 /* copy x into the vec slvec0 */ 1261 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1262 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1263 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1264 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1265 1266 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1268 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 1270 /* supperdiagonal part */ 1271 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1276 { 1277 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1278 PetscErrorCode ierr; 1279 1280 PetscFunctionBegin; 1281 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1282 /* do diagonal part */ 1283 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1284 /* do supperdiagonal part */ 1285 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1286 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1287 1288 /* do subdiagonal part */ 1289 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1290 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1291 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 /* 1296 This only works correctly for square matrices where the subblock A->A is the 1297 diagonal block 1298 */ 1299 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1300 { 1301 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1302 PetscErrorCode ierr; 1303 1304 PetscFunctionBegin; 1305 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1306 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1311 { 1312 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1317 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1322 { 1323 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1324 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1325 PetscErrorCode ierr; 1326 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1327 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1328 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1329 1330 PetscFunctionBegin; 1331 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1332 mat->getrowactive = PETSC_TRUE; 1333 1334 if (!mat->rowvalues && (idx || v)) { 1335 /* 1336 allocate enough space to hold information from the longest row. 1337 */ 1338 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1339 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1340 PetscInt max = 1,mbs = mat->mbs,tmp; 1341 for (i=0; i<mbs; i++) { 1342 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1343 if (max < tmp) max = tmp; 1344 } 1345 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1346 } 1347 1348 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1349 lrow = row - brstart; /* local row index */ 1350 1351 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1352 if (!v) {pvA = 0; pvB = 0;} 1353 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1354 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1355 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1356 nztot = nzA + nzB; 1357 1358 cmap = mat->garray; 1359 if (v || idx) { 1360 if (nztot) { 1361 /* Sort by increasing column numbers, assuming A and B already sorted */ 1362 PetscInt imark = -1; 1363 if (v) { 1364 *v = v_p = mat->rowvalues; 1365 for (i=0; i<nzB; i++) { 1366 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1367 else break; 1368 } 1369 imark = i; 1370 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1371 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1372 } 1373 if (idx) { 1374 *idx = idx_p = mat->rowindices; 1375 if (imark > -1) { 1376 for (i=0; i<imark; i++) { 1377 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1378 } 1379 } else { 1380 for (i=0; i<nzB; i++) { 1381 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1382 else break; 1383 } 1384 imark = i; 1385 } 1386 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1387 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1388 } 1389 } else { 1390 if (idx) *idx = 0; 1391 if (v) *v = 0; 1392 } 1393 } 1394 *nz = nztot; 1395 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1396 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1397 PetscFunctionReturn(0); 1398 } 1399 1400 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1401 { 1402 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1403 1404 PetscFunctionBegin; 1405 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1406 baij->getrowactive = PETSC_FALSE; 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1411 { 1412 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1413 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1414 1415 PetscFunctionBegin; 1416 aA->getrow_utriangular = PETSC_TRUE; 1417 PetscFunctionReturn(0); 1418 } 1419 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1420 { 1421 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1422 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1423 1424 PetscFunctionBegin; 1425 aA->getrow_utriangular = PETSC_FALSE; 1426 PetscFunctionReturn(0); 1427 } 1428 1429 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1430 { 1431 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1432 PetscErrorCode ierr; 1433 1434 PetscFunctionBegin; 1435 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1436 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1441 { 1442 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1443 PetscErrorCode ierr; 1444 1445 PetscFunctionBegin; 1446 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1447 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1452 Input: isrow - distributed(parallel), 1453 iscol_local - locally owned (seq) 1454 */ 1455 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1456 { 1457 PetscErrorCode ierr; 1458 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1459 const PetscInt *ptr1,*ptr2; 1460 1461 PetscFunctionBegin; 1462 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1463 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1464 if (sz1 > sz2) { 1465 *flg = PETSC_FALSE; 1466 PetscFunctionReturn(0); 1467 } 1468 1469 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1470 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1471 1472 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1473 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1474 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1475 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1476 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1477 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1478 1479 nmatch=0; 1480 k = 0; 1481 for (i=0; i<sz1; i++){ 1482 for (j=k; j<sz2; j++){ 1483 if (a1[i] == a2[j]) { 1484 k = j; nmatch++; 1485 break; 1486 } 1487 } 1488 } 1489 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1490 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1491 ierr = PetscFree(a1);CHKERRQ(ierr); 1492 ierr = PetscFree(a2);CHKERRQ(ierr); 1493 if (nmatch < sz1) { 1494 *flg = PETSC_FALSE; 1495 } else { 1496 *flg = PETSC_TRUE; 1497 } 1498 PetscFunctionReturn(0); 1499 } 1500 1501 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1502 { 1503 PetscErrorCode ierr; 1504 IS iscol_local; 1505 PetscInt csize; 1506 PetscBool isequal; 1507 1508 PetscFunctionBegin; 1509 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1510 if (call == MAT_REUSE_MATRIX) { 1511 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1512 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1513 } else { 1514 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1515 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1516 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1517 } 1518 1519 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1520 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1521 if (call == MAT_INITIAL_MATRIX) { 1522 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1523 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1524 } 1525 PetscFunctionReturn(0); 1526 } 1527 1528 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1529 { 1530 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1531 PetscErrorCode ierr; 1532 1533 PetscFunctionBegin; 1534 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1535 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1540 { 1541 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1542 Mat A = a->A,B = a->B; 1543 PetscErrorCode ierr; 1544 PetscReal isend[5],irecv[5]; 1545 1546 PetscFunctionBegin; 1547 info->block_size = (PetscReal)matin->rmap->bs; 1548 1549 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1550 1551 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1552 isend[3] = info->memory; isend[4] = info->mallocs; 1553 1554 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1555 1556 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1557 isend[3] += info->memory; isend[4] += info->mallocs; 1558 if (flag == MAT_LOCAL) { 1559 info->nz_used = isend[0]; 1560 info->nz_allocated = isend[1]; 1561 info->nz_unneeded = isend[2]; 1562 info->memory = isend[3]; 1563 info->mallocs = isend[4]; 1564 } else if (flag == MAT_GLOBAL_MAX) { 1565 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1566 1567 info->nz_used = irecv[0]; 1568 info->nz_allocated = irecv[1]; 1569 info->nz_unneeded = irecv[2]; 1570 info->memory = irecv[3]; 1571 info->mallocs = irecv[4]; 1572 } else if (flag == MAT_GLOBAL_SUM) { 1573 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1574 1575 info->nz_used = irecv[0]; 1576 info->nz_allocated = irecv[1]; 1577 info->nz_unneeded = irecv[2]; 1578 info->memory = irecv[3]; 1579 info->mallocs = irecv[4]; 1580 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1581 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1582 info->fill_ratio_needed = 0; 1583 info->factor_mallocs = 0; 1584 PetscFunctionReturn(0); 1585 } 1586 1587 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1588 { 1589 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1590 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1591 PetscErrorCode ierr; 1592 1593 PetscFunctionBegin; 1594 switch (op) { 1595 case MAT_NEW_NONZERO_LOCATIONS: 1596 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1597 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1598 case MAT_KEEP_NONZERO_PATTERN: 1599 case MAT_SUBMAT_SINGLEIS: 1600 case MAT_NEW_NONZERO_LOCATION_ERR: 1601 MatCheckPreallocated(A,1); 1602 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1603 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1604 break; 1605 case MAT_ROW_ORIENTED: 1606 MatCheckPreallocated(A,1); 1607 a->roworiented = flg; 1608 1609 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1610 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1611 break; 1612 case MAT_NEW_DIAGONALS: 1613 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1614 break; 1615 case MAT_IGNORE_OFF_PROC_ENTRIES: 1616 a->donotstash = flg; 1617 break; 1618 case MAT_USE_HASH_TABLE: 1619 a->ht_flag = flg; 1620 break; 1621 case MAT_HERMITIAN: 1622 MatCheckPreallocated(A,1); 1623 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1624 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1625 #if defined(PETSC_USE_COMPLEX) 1626 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1627 #endif 1628 break; 1629 case MAT_SPD: 1630 A->spd_set = PETSC_TRUE; 1631 A->spd = flg; 1632 if (flg) { 1633 A->symmetric = PETSC_TRUE; 1634 A->structurally_symmetric = PETSC_TRUE; 1635 A->symmetric_set = PETSC_TRUE; 1636 A->structurally_symmetric_set = PETSC_TRUE; 1637 } 1638 break; 1639 case MAT_SYMMETRIC: 1640 MatCheckPreallocated(A,1); 1641 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1642 break; 1643 case MAT_STRUCTURALLY_SYMMETRIC: 1644 MatCheckPreallocated(A,1); 1645 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1646 break; 1647 case MAT_SYMMETRY_ETERNAL: 1648 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1649 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1650 break; 1651 case MAT_IGNORE_LOWER_TRIANGULAR: 1652 aA->ignore_ltriangular = flg; 1653 break; 1654 case MAT_ERROR_LOWER_TRIANGULAR: 1655 aA->ignore_ltriangular = flg; 1656 break; 1657 case MAT_GETROW_UPPERTRIANGULAR: 1658 aA->getrow_utriangular = flg; 1659 break; 1660 default: 1661 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1662 } 1663 PetscFunctionReturn(0); 1664 } 1665 1666 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1667 { 1668 PetscErrorCode ierr; 1669 1670 PetscFunctionBegin; 1671 if (reuse == MAT_INITIAL_MATRIX) { 1672 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1673 } else if (reuse == MAT_REUSE_MATRIX) { 1674 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1675 } 1676 PetscFunctionReturn(0); 1677 } 1678 1679 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1680 { 1681 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1682 Mat a = baij->A, b=baij->B; 1683 PetscErrorCode ierr; 1684 PetscInt nv,m,n; 1685 PetscBool flg; 1686 1687 PetscFunctionBegin; 1688 if (ll != rr) { 1689 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1690 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1691 } 1692 if (!ll) PetscFunctionReturn(0); 1693 1694 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1695 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1696 1697 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1698 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1699 1700 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1701 1702 /* left diagonalscale the off-diagonal part */ 1703 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1704 1705 /* scale the diagonal part */ 1706 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1707 1708 /* right diagonalscale the off-diagonal part */ 1709 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1710 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1711 PetscFunctionReturn(0); 1712 } 1713 1714 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1715 { 1716 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1717 PetscErrorCode ierr; 1718 1719 PetscFunctionBegin; 1720 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1721 PetscFunctionReturn(0); 1722 } 1723 1724 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1725 1726 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1727 { 1728 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1729 Mat a,b,c,d; 1730 PetscBool flg; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 a = matA->A; b = matA->B; 1735 c = matB->A; d = matB->B; 1736 1737 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1738 if (flg) { 1739 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1740 } 1741 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1746 { 1747 PetscErrorCode ierr; 1748 PetscBool isbaij; 1749 1750 PetscFunctionBegin; 1751 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 1752 if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 1753 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1754 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1755 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1756 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1757 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1758 } else { 1759 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1760 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1761 1762 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1763 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1764 } 1765 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1766 PetscFunctionReturn(0); 1767 } 1768 1769 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1770 { 1771 PetscErrorCode ierr; 1772 1773 PetscFunctionBegin; 1774 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1779 { 1780 PetscErrorCode ierr; 1781 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1782 PetscBLASInt bnz,one=1; 1783 Mat_SeqSBAIJ *xa,*ya; 1784 Mat_SeqBAIJ *xb,*yb; 1785 1786 PetscFunctionBegin; 1787 if (str == SAME_NONZERO_PATTERN) { 1788 PetscScalar alpha = a; 1789 xa = (Mat_SeqSBAIJ*)xx->A->data; 1790 ya = (Mat_SeqSBAIJ*)yy->A->data; 1791 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1792 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1793 xb = (Mat_SeqBAIJ*)xx->B->data; 1794 yb = (Mat_SeqBAIJ*)yy->B->data; 1795 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1796 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1797 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1798 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1799 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1800 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1801 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1802 } else { 1803 Mat B; 1804 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1805 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1806 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1807 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1808 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1809 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1810 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1811 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1812 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1813 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1814 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1815 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1816 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1817 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1818 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1819 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1820 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1821 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1822 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1823 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1824 } 1825 PetscFunctionReturn(0); 1826 } 1827 1828 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1829 { 1830 PetscErrorCode ierr; 1831 PetscInt i; 1832 PetscBool flg; 1833 1834 PetscFunctionBegin; 1835 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1836 for (i=0; i<n; i++) { 1837 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1838 if (!flg) { 1839 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1840 } 1841 } 1842 PetscFunctionReturn(0); 1843 } 1844 1845 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1846 { 1847 PetscErrorCode ierr; 1848 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1849 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1850 1851 PetscFunctionBegin; 1852 if (!Y->preallocated) { 1853 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1854 } else if (!aij->nz) { 1855 PetscInt nonew = aij->nonew; 1856 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1857 aij->nonew = nonew; 1858 } 1859 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1860 PetscFunctionReturn(0); 1861 } 1862 1863 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1864 { 1865 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1866 PetscErrorCode ierr; 1867 1868 PetscFunctionBegin; 1869 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1870 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1871 if (d) { 1872 PetscInt rstart; 1873 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1874 *d += rstart/A->rmap->bs; 1875 1876 } 1877 PetscFunctionReturn(0); 1878 } 1879 1880 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1881 { 1882 PetscFunctionBegin; 1883 *a = ((Mat_MPISBAIJ*)A->data)->A; 1884 PetscFunctionReturn(0); 1885 } 1886 1887 /* -------------------------------------------------------------------*/ 1888 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1889 MatGetRow_MPISBAIJ, 1890 MatRestoreRow_MPISBAIJ, 1891 MatMult_MPISBAIJ, 1892 /* 4*/ MatMultAdd_MPISBAIJ, 1893 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1894 MatMultAdd_MPISBAIJ, 1895 0, 1896 0, 1897 0, 1898 /* 10*/ 0, 1899 0, 1900 0, 1901 MatSOR_MPISBAIJ, 1902 MatTranspose_MPISBAIJ, 1903 /* 15*/ MatGetInfo_MPISBAIJ, 1904 MatEqual_MPISBAIJ, 1905 MatGetDiagonal_MPISBAIJ, 1906 MatDiagonalScale_MPISBAIJ, 1907 MatNorm_MPISBAIJ, 1908 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1909 MatAssemblyEnd_MPISBAIJ, 1910 MatSetOption_MPISBAIJ, 1911 MatZeroEntries_MPISBAIJ, 1912 /* 24*/ 0, 1913 0, 1914 0, 1915 0, 1916 0, 1917 /* 29*/ MatSetUp_MPISBAIJ, 1918 0, 1919 0, 1920 MatGetDiagonalBlock_MPISBAIJ, 1921 0, 1922 /* 34*/ MatDuplicate_MPISBAIJ, 1923 0, 1924 0, 1925 0, 1926 0, 1927 /* 39*/ MatAXPY_MPISBAIJ, 1928 MatCreateSubMatrices_MPISBAIJ, 1929 MatIncreaseOverlap_MPISBAIJ, 1930 MatGetValues_MPISBAIJ, 1931 MatCopy_MPISBAIJ, 1932 /* 44*/ 0, 1933 MatScale_MPISBAIJ, 1934 MatShift_MPISBAIJ, 1935 0, 1936 0, 1937 /* 49*/ 0, 1938 0, 1939 0, 1940 0, 1941 0, 1942 /* 54*/ 0, 1943 0, 1944 MatSetUnfactored_MPISBAIJ, 1945 0, 1946 MatSetValuesBlocked_MPISBAIJ, 1947 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1948 0, 1949 0, 1950 0, 1951 0, 1952 /* 64*/ 0, 1953 0, 1954 0, 1955 0, 1956 0, 1957 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1958 0, 1959 0, 1960 0, 1961 0, 1962 /* 74*/ 0, 1963 0, 1964 0, 1965 0, 1966 0, 1967 /* 79*/ 0, 1968 0, 1969 0, 1970 0, 1971 MatLoad_MPISBAIJ, 1972 /* 84*/ 0, 1973 0, 1974 0, 1975 0, 1976 0, 1977 /* 89*/ 0, 1978 0, 1979 0, 1980 0, 1981 0, 1982 /* 94*/ 0, 1983 0, 1984 0, 1985 0, 1986 0, 1987 /* 99*/ 0, 1988 0, 1989 0, 1990 0, 1991 0, 1992 /*104*/ 0, 1993 MatRealPart_MPISBAIJ, 1994 MatImaginaryPart_MPISBAIJ, 1995 MatGetRowUpperTriangular_MPISBAIJ, 1996 MatRestoreRowUpperTriangular_MPISBAIJ, 1997 /*109*/ 0, 1998 0, 1999 0, 2000 0, 2001 MatMissingDiagonal_MPISBAIJ, 2002 /*114*/ 0, 2003 0, 2004 0, 2005 0, 2006 0, 2007 /*119*/ 0, 2008 0, 2009 0, 2010 0, 2011 0, 2012 /*124*/ 0, 2013 0, 2014 0, 2015 0, 2016 0, 2017 /*129*/ 0, 2018 0, 2019 0, 2020 0, 2021 0, 2022 /*134*/ 0, 2023 0, 2024 0, 2025 0, 2026 0, 2027 /*139*/ MatSetBlockSizes_Default, 2028 0, 2029 0, 2030 0, 2031 0, 2032 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2033 }; 2034 2035 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2036 { 2037 Mat_MPISBAIJ *b; 2038 PetscErrorCode ierr; 2039 PetscInt i,mbs,Mbs; 2040 2041 PetscFunctionBegin; 2042 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2043 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2044 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2045 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2046 2047 b = (Mat_MPISBAIJ*)B->data; 2048 mbs = B->rmap->n/bs; 2049 Mbs = B->rmap->N/bs; 2050 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2051 2052 B->rmap->bs = bs; 2053 b->bs2 = bs*bs; 2054 b->mbs = mbs; 2055 b->Mbs = Mbs; 2056 b->nbs = B->cmap->n/bs; 2057 b->Nbs = B->cmap->N/bs; 2058 2059 for (i=0; i<=b->size; i++) { 2060 b->rangebs[i] = B->rmap->range[i]/bs; 2061 } 2062 b->rstartbs = B->rmap->rstart/bs; 2063 b->rendbs = B->rmap->rend/bs; 2064 2065 b->cstartbs = B->cmap->rstart/bs; 2066 b->cendbs = B->cmap->rend/bs; 2067 2068 #if defined(PETSC_USE_CTABLE) 2069 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2070 #else 2071 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2072 #endif 2073 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2074 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2075 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2076 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2077 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2078 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2079 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2080 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2081 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2082 2083 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2084 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2085 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2086 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2087 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2088 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2089 2090 if (!B->preallocated) { 2091 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2092 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2093 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2094 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2095 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2096 } 2097 2098 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2099 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2100 2101 B->preallocated = PETSC_TRUE; 2102 B->was_assembled = PETSC_FALSE; 2103 B->assembled = PETSC_FALSE; 2104 PetscFunctionReturn(0); 2105 } 2106 2107 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2108 { 2109 PetscInt m,rstart,cstart,cend; 2110 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2111 const PetscInt *JJ =0; 2112 PetscScalar *values=0; 2113 PetscErrorCode ierr; 2114 2115 PetscFunctionBegin; 2116 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2117 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2118 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2119 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2120 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2121 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2122 m = B->rmap->n/bs; 2123 rstart = B->rmap->rstart/bs; 2124 cstart = B->cmap->rstart/bs; 2125 cend = B->cmap->rend/bs; 2126 2127 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2128 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2129 for (i=0; i<m; i++) { 2130 nz = ii[i+1] - ii[i]; 2131 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2132 nz_max = PetscMax(nz_max,nz); 2133 JJ = jj + ii[i]; 2134 for (j=0; j<nz; j++) { 2135 if (*JJ >= cstart) break; 2136 JJ++; 2137 } 2138 d = 0; 2139 for (; j<nz; j++) { 2140 if (*JJ++ >= cend) break; 2141 d++; 2142 } 2143 d_nnz[i] = d; 2144 o_nnz[i] = nz - d; 2145 } 2146 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2147 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2148 2149 values = (PetscScalar*)V; 2150 if (!values) { 2151 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2152 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2153 } 2154 for (i=0; i<m; i++) { 2155 PetscInt row = i + rstart; 2156 PetscInt ncols = ii[i+1] - ii[i]; 2157 const PetscInt *icols = jj + ii[i]; 2158 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2159 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2160 } 2161 2162 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2163 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2164 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2165 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 /*MC 2170 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2171 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2172 the matrix is stored. 2173 2174 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2175 can call MatSetOption(Mat, MAT_HERMITIAN); 2176 2177 Options Database Keys: 2178 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2179 2180 Level: beginner 2181 2182 .seealso: MatCreateMPISBAIJ 2183 M*/ 2184 2185 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2186 { 2187 Mat_MPISBAIJ *b; 2188 PetscErrorCode ierr; 2189 PetscBool flg = PETSC_FALSE; 2190 2191 PetscFunctionBegin; 2192 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2193 B->data = (void*)b; 2194 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2195 2196 B->ops->destroy = MatDestroy_MPISBAIJ; 2197 B->ops->view = MatView_MPISBAIJ; 2198 B->assembled = PETSC_FALSE; 2199 B->insertmode = NOT_SET_VALUES; 2200 2201 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2202 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2203 2204 /* build local table of row and column ownerships */ 2205 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2206 2207 /* build cache for off array entries formed */ 2208 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2209 2210 b->donotstash = PETSC_FALSE; 2211 b->colmap = NULL; 2212 b->garray = NULL; 2213 b->roworiented = PETSC_TRUE; 2214 2215 /* stuff used in block assembly */ 2216 b->barray = 0; 2217 2218 /* stuff used for matrix vector multiply */ 2219 b->lvec = 0; 2220 b->Mvctx = 0; 2221 b->slvec0 = 0; 2222 b->slvec0b = 0; 2223 b->slvec1 = 0; 2224 b->slvec1a = 0; 2225 b->slvec1b = 0; 2226 b->sMvctx = 0; 2227 2228 /* stuff for MatGetRow() */ 2229 b->rowindices = 0; 2230 b->rowvalues = 0; 2231 b->getrowactive = PETSC_FALSE; 2232 2233 /* hash table stuff */ 2234 b->ht = 0; 2235 b->hd = 0; 2236 b->ht_size = 0; 2237 b->ht_flag = PETSC_FALSE; 2238 b->ht_fact = 0; 2239 b->ht_total_ct = 0; 2240 b->ht_insert_ct = 0; 2241 2242 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2243 b->ijonly = PETSC_FALSE; 2244 2245 b->in_loc = 0; 2246 b->v_loc = 0; 2247 b->n_loc = 0; 2248 2249 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2250 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2251 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2252 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2253 #if defined(PETSC_HAVE_ELEMENTAL) 2254 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2255 #endif 2256 2257 B->symmetric = PETSC_TRUE; 2258 B->structurally_symmetric = PETSC_TRUE; 2259 B->symmetric_set = PETSC_TRUE; 2260 B->structurally_symmetric_set = PETSC_TRUE; 2261 B->symmetric_eternal = PETSC_TRUE; 2262 2263 B->hermitian = PETSC_FALSE; 2264 B->hermitian_set = PETSC_FALSE; 2265 2266 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2267 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2268 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2269 if (flg) { 2270 PetscReal fact = 1.39; 2271 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2272 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2273 if (fact <= 1.0) fact = 1.39; 2274 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2275 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2276 } 2277 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 /*MC 2282 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2283 2284 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2285 and MATMPISBAIJ otherwise. 2286 2287 Options Database Keys: 2288 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2289 2290 Level: beginner 2291 2292 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2293 M*/ 2294 2295 /*@C 2296 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2297 the user should preallocate the matrix storage by setting the parameters 2298 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2299 performance can be increased by more than a factor of 50. 2300 2301 Collective on Mat 2302 2303 Input Parameters: 2304 + B - the matrix 2305 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2306 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2307 . d_nz - number of block nonzeros per block row in diagonal portion of local 2308 submatrix (same for all local rows) 2309 . d_nnz - array containing the number of block nonzeros in the various block rows 2310 in the upper triangular and diagonal part of the in diagonal portion of the local 2311 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2312 for the diagonal entry and set a value even if it is zero. 2313 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2314 submatrix (same for all local rows). 2315 - o_nnz - array containing the number of nonzeros in the various block rows of the 2316 off-diagonal portion of the local submatrix that is right of the diagonal 2317 (possibly different for each block row) or NULL. 2318 2319 2320 Options Database Keys: 2321 . -mat_no_unroll - uses code that does not unroll the loops in the 2322 block calculations (much slower) 2323 . -mat_block_size - size of the blocks to use 2324 2325 Notes: 2326 2327 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2328 than it must be used on all processors that share the object for that argument. 2329 2330 If the *_nnz parameter is given then the *_nz parameter is ignored 2331 2332 Storage Information: 2333 For a square global matrix we define each processor's diagonal portion 2334 to be its local rows and the corresponding columns (a square submatrix); 2335 each processor's off-diagonal portion encompasses the remainder of the 2336 local matrix (a rectangular submatrix). 2337 2338 The user can specify preallocated storage for the diagonal part of 2339 the local submatrix with either d_nz or d_nnz (not both). Set 2340 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2341 memory allocation. Likewise, specify preallocated storage for the 2342 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2343 2344 You can call MatGetInfo() to get information on how effective the preallocation was; 2345 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2346 You can also run with the option -info and look for messages with the string 2347 malloc in them to see if additional memory allocation was needed. 2348 2349 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2350 the figure below we depict these three local rows and all columns (0-11). 2351 2352 .vb 2353 0 1 2 3 4 5 6 7 8 9 10 11 2354 -------------------------- 2355 row 3 |. . . d d d o o o o o o 2356 row 4 |. . . d d d o o o o o o 2357 row 5 |. . . d d d o o o o o o 2358 -------------------------- 2359 .ve 2360 2361 Thus, any entries in the d locations are stored in the d (diagonal) 2362 submatrix, and any entries in the o locations are stored in the 2363 o (off-diagonal) submatrix. Note that the d matrix is stored in 2364 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2365 2366 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2367 plus the diagonal part of the d matrix, 2368 and o_nz should indicate the number of block nonzeros per row in the o matrix 2369 2370 In general, for PDE problems in which most nonzeros are near the diagonal, 2371 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2372 or you will get TERRIBLE performance; see the users' manual chapter on 2373 matrices. 2374 2375 Level: intermediate 2376 2377 .keywords: matrix, block, aij, compressed row, sparse, parallel 2378 2379 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2380 @*/ 2381 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2382 { 2383 PetscErrorCode ierr; 2384 2385 PetscFunctionBegin; 2386 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2387 PetscValidType(B,1); 2388 PetscValidLogicalCollectiveInt(B,bs,2); 2389 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 /*@C 2394 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2395 (block compressed row). For good matrix assembly performance 2396 the user should preallocate the matrix storage by setting the parameters 2397 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2398 performance can be increased by more than a factor of 50. 2399 2400 Collective on MPI_Comm 2401 2402 Input Parameters: 2403 + comm - MPI communicator 2404 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2405 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2406 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2407 This value should be the same as the local size used in creating the 2408 y vector for the matrix-vector product y = Ax. 2409 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2410 This value should be the same as the local size used in creating the 2411 x vector for the matrix-vector product y = Ax. 2412 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2413 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2414 . d_nz - number of block nonzeros per block row in diagonal portion of local 2415 submatrix (same for all local rows) 2416 . d_nnz - array containing the number of block nonzeros in the various block rows 2417 in the upper triangular portion of the in diagonal portion of the local 2418 (possibly different for each block block row) or NULL. 2419 If you plan to factor the matrix you must leave room for the diagonal entry and 2420 set its value even if it is zero. 2421 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2422 submatrix (same for all local rows). 2423 - o_nnz - array containing the number of nonzeros in the various block rows of the 2424 off-diagonal portion of the local submatrix (possibly different for 2425 each block row) or NULL. 2426 2427 Output Parameter: 2428 . A - the matrix 2429 2430 Options Database Keys: 2431 . -mat_no_unroll - uses code that does not unroll the loops in the 2432 block calculations (much slower) 2433 . -mat_block_size - size of the blocks to use 2434 . -mat_mpi - use the parallel matrix data structures even on one processor 2435 (defaults to using SeqBAIJ format on one processor) 2436 2437 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2438 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2439 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2440 2441 Notes: 2442 The number of rows and columns must be divisible by blocksize. 2443 This matrix type does not support complex Hermitian operation. 2444 2445 The user MUST specify either the local or global matrix dimensions 2446 (possibly both). 2447 2448 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2449 than it must be used on all processors that share the object for that argument. 2450 2451 If the *_nnz parameter is given then the *_nz parameter is ignored 2452 2453 Storage Information: 2454 For a square global matrix we define each processor's diagonal portion 2455 to be its local rows and the corresponding columns (a square submatrix); 2456 each processor's off-diagonal portion encompasses the remainder of the 2457 local matrix (a rectangular submatrix). 2458 2459 The user can specify preallocated storage for the diagonal part of 2460 the local submatrix with either d_nz or d_nnz (not both). Set 2461 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2462 memory allocation. Likewise, specify preallocated storage for the 2463 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2464 2465 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2466 the figure below we depict these three local rows and all columns (0-11). 2467 2468 .vb 2469 0 1 2 3 4 5 6 7 8 9 10 11 2470 -------------------------- 2471 row 3 |. . . d d d o o o o o o 2472 row 4 |. . . d d d o o o o o o 2473 row 5 |. . . d d d o o o o o o 2474 -------------------------- 2475 .ve 2476 2477 Thus, any entries in the d locations are stored in the d (diagonal) 2478 submatrix, and any entries in the o locations are stored in the 2479 o (off-diagonal) submatrix. Note that the d matrix is stored in 2480 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2481 2482 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2483 plus the diagonal part of the d matrix, 2484 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2485 In general, for PDE problems in which most nonzeros are near the diagonal, 2486 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2487 or you will get TERRIBLE performance; see the users' manual chapter on 2488 matrices. 2489 2490 Level: intermediate 2491 2492 .keywords: matrix, block, aij, compressed row, sparse, parallel 2493 2494 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2495 @*/ 2496 2497 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2498 { 2499 PetscErrorCode ierr; 2500 PetscMPIInt size; 2501 2502 PetscFunctionBegin; 2503 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2504 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2505 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2506 if (size > 1) { 2507 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2508 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2509 } else { 2510 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2511 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2512 } 2513 PetscFunctionReturn(0); 2514 } 2515 2516 2517 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2518 { 2519 Mat mat; 2520 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2521 PetscErrorCode ierr; 2522 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2523 PetscScalar *array; 2524 2525 PetscFunctionBegin; 2526 *newmat = 0; 2527 2528 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2529 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2530 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2531 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2532 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2533 2534 mat->factortype = matin->factortype; 2535 mat->preallocated = PETSC_TRUE; 2536 mat->assembled = PETSC_TRUE; 2537 mat->insertmode = NOT_SET_VALUES; 2538 2539 a = (Mat_MPISBAIJ*)mat->data; 2540 a->bs2 = oldmat->bs2; 2541 a->mbs = oldmat->mbs; 2542 a->nbs = oldmat->nbs; 2543 a->Mbs = oldmat->Mbs; 2544 a->Nbs = oldmat->Nbs; 2545 2546 a->size = oldmat->size; 2547 a->rank = oldmat->rank; 2548 a->donotstash = oldmat->donotstash; 2549 a->roworiented = oldmat->roworiented; 2550 a->rowindices = 0; 2551 a->rowvalues = 0; 2552 a->getrowactive = PETSC_FALSE; 2553 a->barray = 0; 2554 a->rstartbs = oldmat->rstartbs; 2555 a->rendbs = oldmat->rendbs; 2556 a->cstartbs = oldmat->cstartbs; 2557 a->cendbs = oldmat->cendbs; 2558 2559 /* hash table stuff */ 2560 a->ht = 0; 2561 a->hd = 0; 2562 a->ht_size = 0; 2563 a->ht_flag = oldmat->ht_flag; 2564 a->ht_fact = oldmat->ht_fact; 2565 a->ht_total_ct = 0; 2566 a->ht_insert_ct = 0; 2567 2568 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2569 if (oldmat->colmap) { 2570 #if defined(PETSC_USE_CTABLE) 2571 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2572 #else 2573 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2574 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2575 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2576 #endif 2577 } else a->colmap = 0; 2578 2579 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2580 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2581 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2582 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2583 } else a->garray = 0; 2584 2585 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2586 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2587 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2588 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2589 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2590 2591 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2592 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2593 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2594 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2595 2596 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2597 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2598 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2599 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2600 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2601 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2602 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2603 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2604 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2605 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2606 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2607 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2608 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2609 2610 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2611 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2612 a->sMvctx = oldmat->sMvctx; 2613 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2614 2615 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2616 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2617 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2618 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2619 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2620 *newmat = mat; 2621 PetscFunctionReturn(0); 2622 } 2623 2624 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2625 { 2626 PetscErrorCode ierr; 2627 PetscInt i,nz,j,rstart,rend; 2628 PetscScalar *vals,*buf; 2629 MPI_Comm comm; 2630 MPI_Status status; 2631 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2632 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2633 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2634 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2635 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2636 PetscInt dcount,kmax,k,nzcount,tmp; 2637 int fd; 2638 PetscBool isbinary; 2639 2640 PetscFunctionBegin; 2641 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2642 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 2643 2644 /* force binary viewer to load .info file if it has not yet done so */ 2645 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2646 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2647 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2648 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2649 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2650 if (bs < 0) bs = 1; 2651 2652 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2653 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2654 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2655 if (!rank) { 2656 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2657 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2658 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2659 } 2660 2661 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2662 M = header[1]; 2663 N = header[2]; 2664 2665 /* If global sizes are set, check if they are consistent with that given in the file */ 2666 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2667 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2668 2669 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2670 2671 /* 2672 This code adds extra rows to make sure the number of rows is 2673 divisible by the blocksize 2674 */ 2675 Mbs = M/bs; 2676 extra_rows = bs - M + bs*(Mbs); 2677 if (extra_rows == bs) extra_rows = 0; 2678 else Mbs++; 2679 if (extra_rows &&!rank) { 2680 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2681 } 2682 2683 /* determine ownership of all rows */ 2684 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2685 mbs = Mbs/size + ((Mbs % size) > rank); 2686 m = mbs*bs; 2687 } else { /* User Set */ 2688 m = newmat->rmap->n; 2689 mbs = m/bs; 2690 } 2691 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2692 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2693 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2694 rowners[0] = 0; 2695 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2696 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2697 rstart = rowners[rank]; 2698 rend = rowners[rank+1]; 2699 2700 /* distribute row lengths to all processors */ 2701 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2702 if (!rank) { 2703 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2704 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2705 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2706 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2707 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2708 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2709 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2710 } else { 2711 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2712 } 2713 2714 if (!rank) { /* procs[0] */ 2715 /* calculate the number of nonzeros on each processor */ 2716 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2717 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2718 for (i=0; i<size; i++) { 2719 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2720 procsnz[i] += rowlengths[j]; 2721 } 2722 } 2723 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2724 2725 /* determine max buffer needed and allocate it */ 2726 maxnz = 0; 2727 for (i=0; i<size; i++) { 2728 maxnz = PetscMax(maxnz,procsnz[i]); 2729 } 2730 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2731 2732 /* read in my part of the matrix column indices */ 2733 nz = procsnz[0]; 2734 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2735 mycols = ibuf; 2736 if (size == 1) nz -= extra_rows; 2737 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2738 if (size == 1) { 2739 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2740 } 2741 2742 /* read in every ones (except the last) and ship off */ 2743 for (i=1; i<size-1; i++) { 2744 nz = procsnz[i]; 2745 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2746 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2747 } 2748 /* read in the stuff for the last proc */ 2749 if (size != 1) { 2750 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2751 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2752 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2753 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2754 } 2755 ierr = PetscFree(cols);CHKERRQ(ierr); 2756 } else { /* procs[i], i>0 */ 2757 /* determine buffer space needed for message */ 2758 nz = 0; 2759 for (i=0; i<m; i++) nz += locrowlens[i]; 2760 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2761 mycols = ibuf; 2762 /* receive message of column indices*/ 2763 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2764 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2765 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2766 } 2767 2768 /* loop over local rows, determining number of off diagonal entries */ 2769 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2770 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2771 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2772 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2773 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2774 rowcount = 0; 2775 nzcount = 0; 2776 for (i=0; i<mbs; i++) { 2777 dcount = 0; 2778 odcount = 0; 2779 for (j=0; j<bs; j++) { 2780 kmax = locrowlens[rowcount]; 2781 for (k=0; k<kmax; k++) { 2782 tmp = mycols[nzcount++]/bs; /* block col. index */ 2783 if (!mask[tmp]) { 2784 mask[tmp] = 1; 2785 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2786 else masked1[dcount++] = tmp; /* entry in diag portion */ 2787 } 2788 } 2789 rowcount++; 2790 } 2791 2792 dlens[i] = dcount; /* d_nzz[i] */ 2793 odlens[i] = odcount; /* o_nzz[i] */ 2794 2795 /* zero out the mask elements we set */ 2796 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2797 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2798 } 2799 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2800 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2801 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2802 2803 if (!rank) { 2804 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2805 /* read in my part of the matrix numerical values */ 2806 nz = procsnz[0]; 2807 vals = buf; 2808 mycols = ibuf; 2809 if (size == 1) nz -= extra_rows; 2810 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2811 if (size == 1) { 2812 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2813 } 2814 2815 /* insert into matrix */ 2816 jj = rstart*bs; 2817 for (i=0; i<m; i++) { 2818 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2819 mycols += locrowlens[i]; 2820 vals += locrowlens[i]; 2821 jj++; 2822 } 2823 2824 /* read in other processors (except the last one) and ship out */ 2825 for (i=1; i<size-1; i++) { 2826 nz = procsnz[i]; 2827 vals = buf; 2828 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2829 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2830 } 2831 /* the last proc */ 2832 if (size != 1) { 2833 nz = procsnz[i] - extra_rows; 2834 vals = buf; 2835 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2836 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2837 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2838 } 2839 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2840 2841 } else { 2842 /* receive numeric values */ 2843 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2844 2845 /* receive message of values*/ 2846 vals = buf; 2847 mycols = ibuf; 2848 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2849 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2850 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2851 2852 /* insert into matrix */ 2853 jj = rstart*bs; 2854 for (i=0; i<m; i++) { 2855 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2856 mycols += locrowlens[i]; 2857 vals += locrowlens[i]; 2858 jj++; 2859 } 2860 } 2861 2862 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2863 ierr = PetscFree(buf);CHKERRQ(ierr); 2864 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2865 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2866 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2867 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2868 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2869 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2870 PetscFunctionReturn(0); 2871 } 2872 2873 /*XXXXX@ 2874 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2875 2876 Input Parameters: 2877 . mat - the matrix 2878 . fact - factor 2879 2880 Not Collective on Mat, each process can have a different hash factor 2881 2882 Level: advanced 2883 2884 Notes: 2885 This can also be set by the command line option: -mat_use_hash_table fact 2886 2887 .keywords: matrix, hashtable, factor, HT 2888 2889 .seealso: MatSetOption() 2890 @XXXXX*/ 2891 2892 2893 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2894 { 2895 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2896 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2897 PetscReal atmp; 2898 PetscReal *work,*svalues,*rvalues; 2899 PetscErrorCode ierr; 2900 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2901 PetscMPIInt rank,size; 2902 PetscInt *rowners_bs,dest,count,source; 2903 PetscScalar *va; 2904 MatScalar *ba; 2905 MPI_Status stat; 2906 2907 PetscFunctionBegin; 2908 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2909 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2910 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2911 2912 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2913 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2914 2915 bs = A->rmap->bs; 2916 mbs = a->mbs; 2917 Mbs = a->Mbs; 2918 ba = b->a; 2919 bi = b->i; 2920 bj = b->j; 2921 2922 /* find ownerships */ 2923 rowners_bs = A->rmap->range; 2924 2925 /* each proc creates an array to be distributed */ 2926 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2927 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2928 2929 /* row_max for B */ 2930 if (rank != size-1) { 2931 for (i=0; i<mbs; i++) { 2932 ncols = bi[1] - bi[0]; bi++; 2933 brow = bs*i; 2934 for (j=0; j<ncols; j++) { 2935 bcol = bs*(*bj); 2936 for (kcol=0; kcol<bs; kcol++) { 2937 col = bcol + kcol; /* local col index */ 2938 col += rowners_bs[rank+1]; /* global col index */ 2939 for (krow=0; krow<bs; krow++) { 2940 atmp = PetscAbsScalar(*ba); ba++; 2941 row = brow + krow; /* local row index */ 2942 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2943 if (work[col] < atmp) work[col] = atmp; 2944 } 2945 } 2946 bj++; 2947 } 2948 } 2949 2950 /* send values to its owners */ 2951 for (dest=rank+1; dest<size; dest++) { 2952 svalues = work + rowners_bs[dest]; 2953 count = rowners_bs[dest+1]-rowners_bs[dest]; 2954 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2955 } 2956 } 2957 2958 /* receive values */ 2959 if (rank) { 2960 rvalues = work; 2961 count = rowners_bs[rank+1]-rowners_bs[rank]; 2962 for (source=0; source<rank; source++) { 2963 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2964 /* process values */ 2965 for (i=0; i<count; i++) { 2966 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2967 } 2968 } 2969 } 2970 2971 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2972 ierr = PetscFree(work);CHKERRQ(ierr); 2973 PetscFunctionReturn(0); 2974 } 2975 2976 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2977 { 2978 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2979 PetscErrorCode ierr; 2980 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2981 PetscScalar *x,*ptr,*from; 2982 Vec bb1; 2983 const PetscScalar *b; 2984 2985 PetscFunctionBegin; 2986 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2987 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2988 2989 if (flag == SOR_APPLY_UPPER) { 2990 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2991 PetscFunctionReturn(0); 2992 } 2993 2994 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2995 if (flag & SOR_ZERO_INITIAL_GUESS) { 2996 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2997 its--; 2998 } 2999 3000 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3001 while (its--) { 3002 3003 /* lower triangular part: slvec0b = - B^T*xx */ 3004 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3005 3006 /* copy xx into slvec0a */ 3007 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3008 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3009 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3010 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3011 3012 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3013 3014 /* copy bb into slvec1a */ 3015 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3016 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3017 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3018 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3019 3020 /* set slvec1b = 0 */ 3021 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3022 3023 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3024 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3025 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3026 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3027 3028 /* upper triangular part: bb1 = bb1 - B*x */ 3029 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3030 3031 /* local diagonal sweep */ 3032 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3033 } 3034 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3035 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3036 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3037 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3038 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3039 } else if (flag & SOR_EISENSTAT) { 3040 Vec xx1; 3041 PetscBool hasop; 3042 const PetscScalar *diag; 3043 PetscScalar *sl,scale = (omega - 2.0)/omega; 3044 PetscInt i,n; 3045 3046 if (!mat->xx1) { 3047 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3048 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3049 } 3050 xx1 = mat->xx1; 3051 bb1 = mat->bb1; 3052 3053 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3054 3055 if (!mat->diag) { 3056 /* this is wrong for same matrix with new nonzero values */ 3057 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3058 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3059 } 3060 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3061 3062 if (hasop) { 3063 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3064 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3065 } else { 3066 /* 3067 These two lines are replaced by code that may be a bit faster for a good compiler 3068 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3069 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3070 */ 3071 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3072 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3073 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3074 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3075 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3076 if (omega == 1.0) { 3077 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3078 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3079 } else { 3080 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3081 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3082 } 3083 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3084 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3085 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3086 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3087 } 3088 3089 /* multiply off-diagonal portion of matrix */ 3090 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3091 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3092 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3093 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3094 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3095 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3096 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3097 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3098 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3099 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3100 3101 /* local sweep */ 3102 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3103 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3104 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3105 PetscFunctionReturn(0); 3106 } 3107 3108 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3109 { 3110 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3111 PetscErrorCode ierr; 3112 Vec lvec1,bb1; 3113 3114 PetscFunctionBegin; 3115 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3116 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3117 3118 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3119 if (flag & SOR_ZERO_INITIAL_GUESS) { 3120 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3121 its--; 3122 } 3123 3124 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3125 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3126 while (its--) { 3127 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3128 3129 /* lower diagonal part: bb1 = bb - B^T*xx */ 3130 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3131 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3132 3133 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3134 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3135 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3136 3137 /* upper diagonal part: bb1 = bb1 - B*x */ 3138 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3139 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3140 3141 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3142 3143 /* diagonal sweep */ 3144 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3145 } 3146 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3147 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3148 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3149 PetscFunctionReturn(0); 3150 } 3151 3152 /*@ 3153 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3154 CSR format the local rows. 3155 3156 Collective on MPI_Comm 3157 3158 Input Parameters: 3159 + comm - MPI communicator 3160 . bs - the block size, only a block size of 1 is supported 3161 . m - number of local rows (Cannot be PETSC_DECIDE) 3162 . n - This value should be the same as the local size used in creating the 3163 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3164 calculated if N is given) For square matrices n is almost always m. 3165 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3166 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3167 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 3168 . j - column indices 3169 - a - matrix values 3170 3171 Output Parameter: 3172 . mat - the matrix 3173 3174 Level: intermediate 3175 3176 Notes: 3177 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3178 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3179 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3180 3181 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3182 3183 .keywords: matrix, aij, compressed row, sparse, parallel 3184 3185 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3186 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3187 @*/ 3188 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3189 { 3190 PetscErrorCode ierr; 3191 3192 3193 PetscFunctionBegin; 3194 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3195 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3196 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3197 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3198 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3199 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3200 PetscFunctionReturn(0); 3201 } 3202 3203 3204 /*@C 3205 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3206 (the default parallel PETSc format). 3207 3208 Collective on MPI_Comm 3209 3210 Input Parameters: 3211 + B - the matrix 3212 . bs - the block size 3213 . i - the indices into j for the start of each local row (starts with zero) 3214 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3215 - v - optional values in the matrix 3216 3217 Level: developer 3218 3219 .keywords: matrix, aij, compressed row, sparse, parallel 3220 3221 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3222 @*/ 3223 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3224 { 3225 PetscErrorCode ierr; 3226 3227 PetscFunctionBegin; 3228 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3229 PetscFunctionReturn(0); 3230 } 3231 3232 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3233 { 3234 PetscErrorCode ierr; 3235 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3236 PetscInt *indx; 3237 PetscScalar *values; 3238 3239 PetscFunctionBegin; 3240 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3241 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3243 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3244 PetscInt *bindx,rmax=a->rmax,j; 3245 3246 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3247 mbs = m/bs; Nbs = N/cbs; 3248 if (n == PETSC_DECIDE) { 3249 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3250 } 3251 /* Check sum(n) = Nbs */ 3252 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3253 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3254 3255 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3256 rstart -= mbs; 3257 3258 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3259 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3260 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3261 for (i=0; i<mbs; i++) { 3262 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3263 nnz = nnz/bs; 3264 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3265 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3266 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3267 } 3268 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3269 ierr = PetscFree(bindx);CHKERRQ(ierr); 3270 3271 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3272 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3273 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3274 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3275 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3276 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3277 } 3278 3279 /* numeric phase */ 3280 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3281 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3282 3283 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3284 for (i=0; i<m; i++) { 3285 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3286 Ii = i + rstart; 3287 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3288 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3289 } 3290 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3291 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3292 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3293 PetscFunctionReturn(0); 3294 } 3295