1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 11 { 12 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 17 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 18 PetscFunctionReturn(0); 19 } 20 21 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 22 { 23 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 28 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 29 PetscFunctionReturn(0); 30 } 31 32 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 33 { \ 34 \ 35 brow = row/bs; \ 36 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 37 rmax = aimax[brow]; nrow = ailen[brow]; \ 38 bcol = col/bs; \ 39 ridx = row % bs; cidx = col % bs; \ 40 low = 0; high = nrow; \ 41 while (high-low > 3) { \ 42 t = (low+high)/2; \ 43 if (rp[t] > bcol) high = t; \ 44 else low = t; \ 45 } \ 46 for (_i=low; _i<high; _i++) { \ 47 if (rp[_i] > bcol) break; \ 48 if (rp[_i] == bcol) { \ 49 bap = ap + bs2*_i + bs*cidx + ridx; \ 50 if (addv == ADD_VALUES) *bap += value; \ 51 else *bap = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (a->nonew == 1) goto a_noinsert; \ 56 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 57 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 58 N = nrow++ - 1; \ 59 /* shift up all the later entries in this row */ \ 60 for (ii=N; ii>=_i; ii--) { \ 61 rp[ii+1] = rp[ii]; \ 62 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 63 } \ 64 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 65 rp[_i] = bcol; \ 66 ap[bs2*_i + bs*cidx + ridx] = value; \ 67 A->nonzerostate++;\ 68 a_noinsert:; \ 69 ailen[brow] = nrow; \ 70 } 71 72 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 73 { \ 74 brow = row/bs; \ 75 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 76 rmax = bimax[brow]; nrow = bilen[brow]; \ 77 bcol = col/bs; \ 78 ridx = row % bs; cidx = col % bs; \ 79 low = 0; high = nrow; \ 80 while (high-low > 3) { \ 81 t = (low+high)/2; \ 82 if (rp[t] > bcol) high = t; \ 83 else low = t; \ 84 } \ 85 for (_i=low; _i<high; _i++) { \ 86 if (rp[_i] > bcol) break; \ 87 if (rp[_i] == bcol) { \ 88 bap = ap + bs2*_i + bs*cidx + ridx; \ 89 if (addv == ADD_VALUES) *bap += value; \ 90 else *bap = value; \ 91 goto b_noinsert; \ 92 } \ 93 } \ 94 if (b->nonew == 1) goto b_noinsert; \ 95 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 96 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 97 N = nrow++ - 1; \ 98 /* shift up all the later entries in this row */ \ 99 for (ii=N; ii>=_i; ii--) { \ 100 rp[ii+1] = rp[ii]; \ 101 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 102 } \ 103 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 104 rp[_i] = bcol; \ 105 ap[bs2*_i + bs*cidx + ridx] = value; \ 106 B->nonzerostate++;\ 107 b_noinsert:; \ 108 bilen[brow] = nrow; \ 109 } 110 111 /* Only add/insert a(i,j) with i<=j (blocks). 112 Any a(i,j) with i>j input by user is ingored. 113 */ 114 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 115 { 116 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 117 MatScalar value; 118 PetscBool roworiented = baij->roworiented; 119 PetscErrorCode ierr; 120 PetscInt i,j,row,col; 121 PetscInt rstart_orig=mat->rmap->rstart; 122 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 123 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 124 125 /* Some Variables required in the macro */ 126 Mat A = baij->A; 127 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 128 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 129 MatScalar *aa =a->a; 130 131 Mat B = baij->B; 132 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 133 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 134 MatScalar *ba =b->a; 135 136 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 137 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 138 MatScalar *ap,*bap; 139 140 /* for stash */ 141 PetscInt n_loc, *in_loc = NULL; 142 MatScalar *v_loc = NULL; 143 144 PetscFunctionBegin; 145 if (!baij->donotstash) { 146 if (n > baij->n_loc) { 147 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 148 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 149 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 150 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 151 152 baij->n_loc = n; 153 } 154 in_loc = baij->in_loc; 155 v_loc = baij->v_loc; 156 } 157 158 for (i=0; i<m; i++) { 159 if (im[i] < 0) continue; 160 #if defined(PETSC_USE_DEBUG) 161 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 162 #endif 163 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 164 row = im[i] - rstart_orig; /* local row index */ 165 for (j=0; j<n; j++) { 166 if (im[i]/bs > in[j]/bs) { 167 if (a->ignore_ltriangular) { 168 continue; /* ignore lower triangular blocks */ 169 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 170 } 171 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 172 col = in[j] - cstart_orig; /* local col index */ 173 brow = row/bs; bcol = col/bs; 174 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 175 if (roworiented) value = v[i*n+j]; 176 else value = v[i+j*m]; 177 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 178 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 179 } else if (in[j] < 0) continue; 180 #if defined(PETSC_USE_DEBUG) 181 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 182 #endif 183 else { /* off-diag entry (B) */ 184 if (mat->was_assembled) { 185 if (!baij->colmap) { 186 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 187 } 188 #if defined(PETSC_USE_CTABLE) 189 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 190 col = col - 1; 191 #else 192 col = baij->colmap[in[j]/bs] - 1; 193 #endif 194 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 195 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 196 col = in[j]; 197 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 198 B = baij->B; 199 b = (Mat_SeqBAIJ*)(B)->data; 200 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 201 ba = b->a; 202 } else col += in[j]%bs; 203 } else col = in[j]; 204 if (roworiented) value = v[i*n+j]; 205 else value = v[i+j*m]; 206 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 207 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 208 } 209 } 210 } else { /* off processor entry */ 211 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 212 if (!baij->donotstash) { 213 mat->assembled = PETSC_FALSE; 214 n_loc = 0; 215 for (j=0; j<n; j++) { 216 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 217 in_loc[n_loc] = in[j]; 218 if (roworiented) { 219 v_loc[n_loc] = v[i*n+j]; 220 } else { 221 v_loc[n_loc] = v[j*m+i]; 222 } 223 n_loc++; 224 } 225 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 226 } 227 } 228 } 229 PetscFunctionReturn(0); 230 } 231 232 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 233 { 234 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 235 PetscErrorCode ierr; 236 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 237 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 238 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 239 PetscBool roworiented=a->roworiented; 240 const PetscScalar *value = v; 241 MatScalar *ap,*aa = a->a,*bap; 242 243 PetscFunctionBegin; 244 if (col < row) { 245 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 246 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 247 } 248 rp = aj + ai[row]; 249 ap = aa + bs2*ai[row]; 250 rmax = imax[row]; 251 nrow = ailen[row]; 252 value = v; 253 low = 0; 254 high = nrow; 255 256 while (high-low > 7) { 257 t = (low+high)/2; 258 if (rp[t] > col) high = t; 259 else low = t; 260 } 261 for (i=low; i<high; i++) { 262 if (rp[i] > col) break; 263 if (rp[i] == col) { 264 bap = ap + bs2*i; 265 if (roworiented) { 266 if (is == ADD_VALUES) { 267 for (ii=0; ii<bs; ii++) { 268 for (jj=ii; jj<bs2; jj+=bs) { 269 bap[jj] += *value++; 270 } 271 } 272 } else { 273 for (ii=0; ii<bs; ii++) { 274 for (jj=ii; jj<bs2; jj+=bs) { 275 bap[jj] = *value++; 276 } 277 } 278 } 279 } else { 280 if (is == ADD_VALUES) { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=0; jj<bs; jj++) { 283 *bap++ += *value++; 284 } 285 } 286 } else { 287 for (ii=0; ii<bs; ii++) { 288 for (jj=0; jj<bs; jj++) { 289 *bap++ = *value++; 290 } 291 } 292 } 293 } 294 goto noinsert2; 295 } 296 } 297 if (nonew == 1) goto noinsert2; 298 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 299 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 300 N = nrow++ - 1; high++; 301 /* shift up all the later entries in this row */ 302 for (ii=N; ii>=i; ii--) { 303 rp[ii+1] = rp[ii]; 304 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 305 } 306 if (N >= i) { 307 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 308 } 309 rp[i] = col; 310 bap = ap + bs2*i; 311 if (roworiented) { 312 for (ii=0; ii<bs; ii++) { 313 for (jj=ii; jj<bs2; jj+=bs) { 314 bap[jj] = *value++; 315 } 316 } 317 } else { 318 for (ii=0; ii<bs; ii++) { 319 for (jj=0; jj<bs; jj++) { 320 *bap++ = *value++; 321 } 322 } 323 } 324 noinsert2:; 325 ailen[row] = nrow; 326 PetscFunctionReturn(0); 327 } 328 329 /* 330 This routine is exactly duplicated in mpibaij.c 331 */ 332 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 333 { 334 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 335 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 336 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 337 PetscErrorCode ierr; 338 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 339 PetscBool roworiented=a->roworiented; 340 const PetscScalar *value = v; 341 MatScalar *ap,*aa = a->a,*bap; 342 343 PetscFunctionBegin; 344 rp = aj + ai[row]; 345 ap = aa + bs2*ai[row]; 346 rmax = imax[row]; 347 nrow = ailen[row]; 348 low = 0; 349 high = nrow; 350 value = v; 351 while (high-low > 7) { 352 t = (low+high)/2; 353 if (rp[t] > col) high = t; 354 else low = t; 355 } 356 for (i=low; i<high; i++) { 357 if (rp[i] > col) break; 358 if (rp[i] == col) { 359 bap = ap + bs2*i; 360 if (roworiented) { 361 if (is == ADD_VALUES) { 362 for (ii=0; ii<bs; ii++) { 363 for (jj=ii; jj<bs2; jj+=bs) { 364 bap[jj] += *value++; 365 } 366 } 367 } else { 368 for (ii=0; ii<bs; ii++) { 369 for (jj=ii; jj<bs2; jj+=bs) { 370 bap[jj] = *value++; 371 } 372 } 373 } 374 } else { 375 if (is == ADD_VALUES) { 376 for (ii=0; ii<bs; ii++,value+=bs) { 377 for (jj=0; jj<bs; jj++) { 378 bap[jj] += value[jj]; 379 } 380 bap += bs; 381 } 382 } else { 383 for (ii=0; ii<bs; ii++,value+=bs) { 384 for (jj=0; jj<bs; jj++) { 385 bap[jj] = value[jj]; 386 } 387 bap += bs; 388 } 389 } 390 } 391 goto noinsert2; 392 } 393 } 394 if (nonew == 1) goto noinsert2; 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 396 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 397 N = nrow++ - 1; high++; 398 /* shift up all the later entries in this row */ 399 for (ii=N; ii>=i; ii--) { 400 rp[ii+1] = rp[ii]; 401 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 402 } 403 if (N >= i) { 404 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 405 } 406 rp[i] = col; 407 bap = ap + bs2*i; 408 if (roworiented) { 409 for (ii=0; ii<bs; ii++) { 410 for (jj=ii; jj<bs2; jj+=bs) { 411 bap[jj] = *value++; 412 } 413 } 414 } else { 415 for (ii=0; ii<bs; ii++) { 416 for (jj=0; jj<bs; jj++) { 417 *bap++ = *value++; 418 } 419 } 420 } 421 noinsert2:; 422 ailen[row] = nrow; 423 PetscFunctionReturn(0); 424 } 425 426 /* 427 This routine could be optimized by removing the need for the block copy below and passing stride information 428 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 429 */ 430 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 431 { 432 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 433 const MatScalar *value; 434 MatScalar *barray =baij->barray; 435 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 436 PetscErrorCode ierr; 437 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 438 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 439 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 440 441 PetscFunctionBegin; 442 if (!barray) { 443 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 444 baij->barray = barray; 445 } 446 447 if (roworiented) { 448 stepval = (n-1)*bs; 449 } else { 450 stepval = (m-1)*bs; 451 } 452 for (i=0; i<m; i++) { 453 if (im[i] < 0) continue; 454 #if defined(PETSC_USE_DEBUG) 455 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 456 #endif 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 for (j=0; j<n; j++) { 460 if (im[i] > in[j]) { 461 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 462 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 463 } 464 /* If NumCol = 1 then a copy is not required */ 465 if ((roworiented) && (n == 1)) { 466 barray = (MatScalar*) v + i*bs2; 467 } else if ((!roworiented) && (m == 1)) { 468 barray = (MatScalar*) v + j*bs2; 469 } else { /* Here a copy is required */ 470 if (roworiented) { 471 value = v + i*(stepval+bs)*bs + j*bs; 472 } else { 473 value = v + j*(stepval+bs)*bs + i*bs; 474 } 475 for (ii=0; ii<bs; ii++,value+=stepval) { 476 for (jj=0; jj<bs; jj++) { 477 *barray++ = *value++; 478 } 479 } 480 barray -=bs2; 481 } 482 483 if (in[j] >= cstart && in[j] < cend) { 484 col = in[j] - cstart; 485 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 486 } else if (in[j] < 0) continue; 487 #if defined(PETSC_USE_DEBUG) 488 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 489 #endif 490 else { 491 if (mat->was_assembled) { 492 if (!baij->colmap) { 493 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 494 } 495 496 #if defined(PETSC_USE_DEBUG) 497 #if defined(PETSC_USE_CTABLE) 498 { PetscInt data; 499 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 500 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 501 } 502 #else 503 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 504 #endif 505 #endif 506 #if defined(PETSC_USE_CTABLE) 507 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 508 col = (col - 1)/bs; 509 #else 510 col = (baij->colmap[in[j]] - 1)/bs; 511 #endif 512 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 513 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 514 col = in[j]; 515 } 516 } else col = in[j]; 517 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 518 } 519 } 520 } else { 521 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 522 if (!baij->donotstash) { 523 if (roworiented) { 524 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 525 } else { 526 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 527 } 528 } 529 } 530 } 531 PetscFunctionReturn(0); 532 } 533 534 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 535 { 536 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 537 PetscErrorCode ierr; 538 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 539 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 540 541 PetscFunctionBegin; 542 for (i=0; i<m; i++) { 543 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 544 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 545 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 546 row = idxm[i] - bsrstart; 547 for (j=0; j<n; j++) { 548 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 549 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 550 if (idxn[j] >= bscstart && idxn[j] < bscend) { 551 col = idxn[j] - bscstart; 552 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 553 } else { 554 if (!baij->colmap) { 555 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 556 } 557 #if defined(PETSC_USE_CTABLE) 558 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 559 data--; 560 #else 561 data = baij->colmap[idxn[j]/bs]-1; 562 #endif 563 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 564 else { 565 col = data + idxn[j]%bs; 566 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } 568 } 569 } 570 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 571 } 572 PetscFunctionReturn(0); 573 } 574 575 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 576 { 577 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 578 PetscErrorCode ierr; 579 PetscReal sum[2],*lnorm2; 580 581 PetscFunctionBegin; 582 if (baij->size == 1) { 583 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 584 } else { 585 if (type == NORM_FROBENIUS) { 586 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 587 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 588 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 589 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 590 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 591 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 592 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 593 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 594 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 595 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 596 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 597 PetscReal *rsum,*rsum2,vabs; 598 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 599 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 600 MatScalar *v; 601 602 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 603 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 604 /* Amat */ 605 v = amat->a; jj = amat->j; 606 for (brow=0; brow<mbs; brow++) { 607 grow = bs*(rstart + brow); 608 nz = amat->i[brow+1] - amat->i[brow]; 609 for (bcol=0; bcol<nz; bcol++) { 610 gcol = bs*(rstart + *jj); jj++; 611 for (col=0; col<bs; col++) { 612 for (row=0; row<bs; row++) { 613 vabs = PetscAbsScalar(*v); v++; 614 rsum[gcol+col] += vabs; 615 /* non-diagonal block */ 616 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 617 } 618 } 619 } 620 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 621 } 622 /* Bmat */ 623 v = bmat->a; jj = bmat->j; 624 for (brow=0; brow<mbs; brow++) { 625 grow = bs*(rstart + brow); 626 nz = bmat->i[brow+1] - bmat->i[brow]; 627 for (bcol=0; bcol<nz; bcol++) { 628 gcol = bs*garray[*jj]; jj++; 629 for (col=0; col<bs; col++) { 630 for (row=0; row<bs; row++) { 631 vabs = PetscAbsScalar(*v); v++; 632 rsum[gcol+col] += vabs; 633 rsum[grow+row] += vabs; 634 } 635 } 636 } 637 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 638 } 639 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 *norm = 0.0; 641 for (col=0; col<mat->cmap->N; col++) { 642 if (rsum2[col] > *norm) *norm = rsum2[col]; 643 } 644 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 645 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 646 } 647 PetscFunctionReturn(0); 648 } 649 650 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 651 { 652 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 653 PetscErrorCode ierr; 654 PetscInt nstash,reallocs; 655 656 PetscFunctionBegin; 657 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 658 659 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 660 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 664 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 668 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 671 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 672 PetscErrorCode ierr; 673 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 674 PetscInt *row,*col; 675 PetscBool other_disassembled; 676 PetscMPIInt n; 677 PetscBool r1,r2,r3; 678 MatScalar *val; 679 680 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 681 PetscFunctionBegin; 682 if (!baij->donotstash && !mat->nooffprocentries) { 683 while (1) { 684 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 685 if (!flg) break; 686 687 for (i=0; i<n;) { 688 /* Now identify the consecutive vals belonging to the same row */ 689 for (j=i,rstart=row[j]; j<n; j++) { 690 if (row[j] != rstart) break; 691 } 692 if (j < n) ncols = j-i; 693 else ncols = n-i; 694 /* Now assemble all these values with a single function call */ 695 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 696 i = j; 697 } 698 } 699 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 700 /* Now process the block-stash. Since the values are stashed column-oriented, 701 set the roworiented flag to column oriented, and after MatSetValues() 702 restore the original flags */ 703 r1 = baij->roworiented; 704 r2 = a->roworiented; 705 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 706 707 baij->roworiented = PETSC_FALSE; 708 a->roworiented = PETSC_FALSE; 709 710 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 711 while (1) { 712 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 713 if (!flg) break; 714 715 for (i=0; i<n;) { 716 /* Now identify the consecutive vals belonging to the same row */ 717 for (j=i,rstart=row[j]; j<n; j++) { 718 if (row[j] != rstart) break; 719 } 720 if (j < n) ncols = j-i; 721 else ncols = n-i; 722 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 723 i = j; 724 } 725 } 726 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 727 728 baij->roworiented = r1; 729 a->roworiented = r2; 730 731 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 732 } 733 734 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 735 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 736 737 /* determine if any processor has disassembled, if so we must 738 also disassemble ourselfs, in order that we may reassemble. */ 739 /* 740 if nonzero structure of submatrix B cannot change then we know that 741 no processor disassembled thus we can skip this stuff 742 */ 743 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 744 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 745 if (mat->was_assembled && !other_disassembled) { 746 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 747 } 748 } 749 750 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 751 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 752 } 753 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 754 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 755 756 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 757 758 baij->rowvalues = 0; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 762 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer); 769 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 770 #include <petscdraw.h> 771 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 772 { 773 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 774 PetscErrorCode ierr; 775 PetscInt bs = mat->rmap->bs; 776 PetscMPIInt rank = baij->rank; 777 PetscBool iascii,isdraw; 778 PetscViewer sviewer; 779 PetscViewerFormat format; 780 781 PetscFunctionBegin; 782 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 783 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 784 if (iascii) { 785 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 786 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 787 MatInfo info; 788 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 789 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 790 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 791 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 792 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 793 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 794 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 795 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 796 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 797 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 798 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 799 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 800 PetscFunctionReturn(0); 801 } else if (format == PETSC_VIEWER_ASCII_INFO) { 802 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 803 PetscFunctionReturn(0); 804 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 805 PetscFunctionReturn(0); 806 } 807 } 808 809 if (isdraw) { 810 PetscDraw draw; 811 PetscBool isnull; 812 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 813 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 814 if (isnull) PetscFunctionReturn(0); 815 } 816 817 { 818 /* assemble the entire matrix onto first processor. */ 819 Mat A; 820 Mat_SeqSBAIJ *Aloc; 821 Mat_SeqBAIJ *Bloc; 822 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 823 MatScalar *a; 824 const char *matname; 825 826 /* Should this be the same type as mat? */ 827 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 828 if (!rank) { 829 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 830 } else { 831 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 832 } 833 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 834 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 835 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 836 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 837 838 /* copy over the A part */ 839 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 840 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 841 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 842 843 for (i=0; i<mbs; i++) { 844 rvals[0] = bs*(baij->rstartbs + i); 845 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 846 for (j=ai[i]; j<ai[i+1]; j++) { 847 col = (baij->cstartbs+aj[j])*bs; 848 for (k=0; k<bs; k++) { 849 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 850 col++; 851 a += bs; 852 } 853 } 854 } 855 /* copy over the B part */ 856 Bloc = (Mat_SeqBAIJ*)baij->B->data; 857 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 858 for (i=0; i<mbs; i++) { 859 860 rvals[0] = bs*(baij->rstartbs + i); 861 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 862 for (j=ai[i]; j<ai[i+1]; j++) { 863 col = baij->garray[aj[j]]*bs; 864 for (k=0; k<bs; k++) { 865 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 866 col++; 867 a += bs; 868 } 869 } 870 } 871 ierr = PetscFree(rvals);CHKERRQ(ierr); 872 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 873 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 874 /* 875 Everyone has to call to draw the matrix since the graphics waits are 876 synchronized across all processors that share the PetscDraw object 877 */ 878 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 879 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 880 if (!rank) { 881 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 882 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 883 } 884 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 885 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 886 ierr = MatDestroy(&A);CHKERRQ(ierr); 887 } 888 PetscFunctionReturn(0); 889 } 890 891 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 892 { 893 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 894 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 895 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 896 PetscErrorCode ierr; 897 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 898 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 899 int fd; 900 PetscScalar *column_values; 901 FILE *file; 902 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 903 PetscInt message_count,flowcontrolcount; 904 905 PetscFunctionBegin; 906 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 907 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 908 nz = bs2*(A->nz + B->nz); 909 rlen = mat->rmap->n; 910 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 911 if (!rank) { 912 header[0] = MAT_FILE_CLASSID; 913 header[1] = mat->rmap->N; 914 header[2] = mat->cmap->N; 915 916 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 917 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 918 /* get largest number of rows any processor has */ 919 range = mat->rmap->range; 920 for (i=1; i<size; i++) { 921 rlen = PetscMax(rlen,range[i+1] - range[i]); 922 } 923 } else { 924 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 925 } 926 927 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 928 /* compute lengths of each row */ 929 for (i=0; i<a->mbs; i++) { 930 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 931 } 932 /* store the row lengths to the file */ 933 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 934 if (!rank) { 935 MPI_Status status; 936 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 937 rlen = (range[1] - range[0])/bs; 938 for (i=0; i<rlen; i++) { 939 for (j=0; j<bs; j++) { 940 row_lens[i*bs+j] = bs*crow_lens[i]; 941 } 942 } 943 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 944 for (i=1; i<size; i++) { 945 rlen = (range[i+1] - range[i])/bs; 946 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 947 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 948 for (k=0; k<rlen; k++) { 949 for (j=0; j<bs; j++) { 950 row_lens[k*bs+j] = bs*crow_lens[k]; 951 } 952 } 953 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 954 } 955 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 956 ierr = PetscFree(row_lens);CHKERRQ(ierr); 957 } else { 958 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 959 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 960 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 961 } 962 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 963 964 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 965 information needed to make it for each row from a block row. This does require more communication but still not more than 966 the communication needed for the nonzero values */ 967 nzmax = nz; /* space a largest processor needs */ 968 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 969 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 970 cnt = 0; 971 for (i=0; i<a->mbs; i++) { 972 pcnt = cnt; 973 for (j=B->i[i]; j<B->i[i+1]; j++) { 974 if ((col = garray[B->j[j]]) > cstart) break; 975 for (l=0; l<bs; l++) { 976 column_indices[cnt++] = bs*col+l; 977 } 978 } 979 for (k=A->i[i]; k<A->i[i+1]; k++) { 980 for (l=0; l<bs; l++) { 981 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 982 } 983 } 984 for (; j<B->i[i+1]; j++) { 985 for (l=0; l<bs; l++) { 986 column_indices[cnt++] = bs*garray[B->j[j]]+l; 987 } 988 } 989 len = cnt - pcnt; 990 for (k=1; k<bs; k++) { 991 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 992 cnt += len; 993 } 994 } 995 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 996 997 /* store the columns to the file */ 998 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 999 if (!rank) { 1000 MPI_Status status; 1001 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1002 for (i=1; i<size; i++) { 1003 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1004 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1005 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1006 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1007 } 1008 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1009 } else { 1010 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1011 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1012 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1013 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1014 } 1015 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1016 1017 /* load up the numerical values */ 1018 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1019 cnt = 0; 1020 for (i=0; i<a->mbs; i++) { 1021 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1022 for (j=B->i[i]; j<B->i[i+1]; j++) { 1023 if (garray[B->j[j]] > cstart) break; 1024 for (l=0; l<bs; l++) { 1025 for (ll=0; ll<bs; ll++) { 1026 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1027 } 1028 } 1029 cnt += bs; 1030 } 1031 for (k=A->i[i]; k<A->i[i+1]; k++) { 1032 for (l=0; l<bs; l++) { 1033 for (ll=0; ll<bs; ll++) { 1034 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1035 } 1036 } 1037 cnt += bs; 1038 } 1039 for (; j<B->i[i+1]; j++) { 1040 for (l=0; l<bs; l++) { 1041 for (ll=0; ll<bs; ll++) { 1042 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1043 } 1044 } 1045 cnt += bs; 1046 } 1047 cnt += (bs-1)*rlen; 1048 } 1049 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1050 1051 /* store the column values to the file */ 1052 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1053 if (!rank) { 1054 MPI_Status status; 1055 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1056 for (i=1; i<size; i++) { 1057 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1058 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1059 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1060 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1061 } 1062 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1063 } else { 1064 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1065 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1066 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1067 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1068 } 1069 ierr = PetscFree(column_values);CHKERRQ(ierr); 1070 1071 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1072 if (file) { 1073 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1074 } 1075 PetscFunctionReturn(0); 1076 } 1077 1078 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1079 { 1080 PetscErrorCode ierr; 1081 PetscBool iascii,isdraw,issocket,isbinary; 1082 1083 PetscFunctionBegin; 1084 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1085 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1086 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1087 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1088 if (iascii || isdraw || issocket) { 1089 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1090 } else if (isbinary) { 1091 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1097 { 1098 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 #if defined(PETSC_USE_LOG) 1103 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1104 #endif 1105 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1106 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1107 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1108 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1109 #if defined(PETSC_USE_CTABLE) 1110 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1111 #else 1112 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1113 #endif 1114 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1115 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1116 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1117 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1118 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1119 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1120 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1121 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1122 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1123 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1124 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1125 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1126 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1127 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1128 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1129 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1130 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1131 #endif 1132 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1133 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1134 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1135 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1136 1137 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 1142 #if defined(PETSC_HAVE_ELEMENTAL) 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1144 #endif 1145 PetscFunctionReturn(0); 1146 } 1147 1148 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1149 { 1150 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1151 PetscErrorCode ierr; 1152 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1153 PetscScalar *from; 1154 const PetscScalar *x; 1155 1156 PetscFunctionBegin; 1157 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1158 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1159 1160 /* diagonal part */ 1161 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1162 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1163 1164 /* subdiagonal part */ 1165 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1166 1167 /* copy x into the vec slvec0 */ 1168 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1169 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1170 1171 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1172 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1173 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1174 1175 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1176 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1177 /* supperdiagonal part */ 1178 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1183 { 1184 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1185 PetscErrorCode ierr; 1186 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1187 PetscScalar *from; 1188 const PetscScalar *x; 1189 1190 PetscFunctionBegin; 1191 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1192 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1193 1194 /* diagonal part */ 1195 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1196 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1197 1198 /* subdiagonal part */ 1199 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1200 1201 /* copy x into the vec slvec0 */ 1202 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1203 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1204 1205 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1206 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1207 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1208 1209 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1210 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1211 /* supperdiagonal part */ 1212 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1217 { 1218 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1219 PetscErrorCode ierr; 1220 PetscInt nt; 1221 1222 PetscFunctionBegin; 1223 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1224 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1225 1226 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1227 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1228 1229 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 /* do diagonal part */ 1231 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1232 /* do supperdiagonal part */ 1233 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1234 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1235 /* do subdiagonal part */ 1236 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1237 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1238 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1243 { 1244 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1245 PetscErrorCode ierr; 1246 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1247 PetscScalar *from,zero=0.0; 1248 const PetscScalar *x; 1249 1250 PetscFunctionBegin; 1251 /* 1252 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1253 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1254 */ 1255 /* diagonal part */ 1256 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1257 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1258 1259 /* subdiagonal part */ 1260 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1261 1262 /* copy x into the vec slvec0 */ 1263 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1264 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1265 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1266 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1267 1268 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1270 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1271 1272 /* supperdiagonal part */ 1273 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1278 { 1279 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1280 PetscErrorCode ierr; 1281 1282 PetscFunctionBegin; 1283 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 /* do diagonal part */ 1285 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1286 /* do supperdiagonal part */ 1287 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1288 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1289 1290 /* do subdiagonal part */ 1291 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1292 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1293 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 /* 1298 This only works correctly for square matrices where the subblock A->A is the 1299 diagonal block 1300 */ 1301 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1302 { 1303 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1304 PetscErrorCode ierr; 1305 1306 PetscFunctionBegin; 1307 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1308 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1313 { 1314 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1319 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1324 { 1325 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1326 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1327 PetscErrorCode ierr; 1328 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1329 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1330 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1331 1332 PetscFunctionBegin; 1333 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1334 mat->getrowactive = PETSC_TRUE; 1335 1336 if (!mat->rowvalues && (idx || v)) { 1337 /* 1338 allocate enough space to hold information from the longest row. 1339 */ 1340 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1341 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1342 PetscInt max = 1,mbs = mat->mbs,tmp; 1343 for (i=0; i<mbs; i++) { 1344 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1345 if (max < tmp) max = tmp; 1346 } 1347 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1348 } 1349 1350 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1351 lrow = row - brstart; /* local row index */ 1352 1353 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1354 if (!v) {pvA = 0; pvB = 0;} 1355 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1356 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1357 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1358 nztot = nzA + nzB; 1359 1360 cmap = mat->garray; 1361 if (v || idx) { 1362 if (nztot) { 1363 /* Sort by increasing column numbers, assuming A and B already sorted */ 1364 PetscInt imark = -1; 1365 if (v) { 1366 *v = v_p = mat->rowvalues; 1367 for (i=0; i<nzB; i++) { 1368 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1369 else break; 1370 } 1371 imark = i; 1372 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1373 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1374 } 1375 if (idx) { 1376 *idx = idx_p = mat->rowindices; 1377 if (imark > -1) { 1378 for (i=0; i<imark; i++) { 1379 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1380 } 1381 } else { 1382 for (i=0; i<nzB; i++) { 1383 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1384 else break; 1385 } 1386 imark = i; 1387 } 1388 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1389 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1390 } 1391 } else { 1392 if (idx) *idx = 0; 1393 if (v) *v = 0; 1394 } 1395 } 1396 *nz = nztot; 1397 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1398 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1403 { 1404 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1405 1406 PetscFunctionBegin; 1407 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1408 baij->getrowactive = PETSC_FALSE; 1409 PetscFunctionReturn(0); 1410 } 1411 1412 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1413 { 1414 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1415 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1416 1417 PetscFunctionBegin; 1418 aA->getrow_utriangular = PETSC_TRUE; 1419 PetscFunctionReturn(0); 1420 } 1421 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1422 { 1423 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1424 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1425 1426 PetscFunctionBegin; 1427 aA->getrow_utriangular = PETSC_FALSE; 1428 PetscFunctionReturn(0); 1429 } 1430 1431 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1432 { 1433 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1434 PetscErrorCode ierr; 1435 1436 PetscFunctionBegin; 1437 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1438 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1443 { 1444 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1445 PetscErrorCode ierr; 1446 1447 PetscFunctionBegin; 1448 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1449 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1450 PetscFunctionReturn(0); 1451 } 1452 1453 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1454 Input: isrow - distributed(parallel), 1455 iscol_local - locally owned (seq) 1456 */ 1457 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1458 { 1459 PetscErrorCode ierr; 1460 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1461 const PetscInt *ptr1,*ptr2; 1462 1463 PetscFunctionBegin; 1464 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1465 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1466 if (sz1 > sz2) { 1467 *flg = PETSC_FALSE; 1468 PetscFunctionReturn(0); 1469 } 1470 1471 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1472 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1473 1474 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1475 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1476 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1477 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1478 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1479 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1480 1481 nmatch=0; 1482 k = 0; 1483 for (i=0; i<sz1; i++){ 1484 for (j=k; j<sz2; j++){ 1485 if (a1[i] == a2[j]) { 1486 k = j; nmatch++; 1487 break; 1488 } 1489 } 1490 } 1491 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1492 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1493 ierr = PetscFree(a1);CHKERRQ(ierr); 1494 ierr = PetscFree(a2);CHKERRQ(ierr); 1495 if (nmatch < sz1) { 1496 *flg = PETSC_FALSE; 1497 } else { 1498 *flg = PETSC_TRUE; 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1504 { 1505 PetscErrorCode ierr; 1506 IS iscol_local; 1507 PetscInt csize; 1508 PetscBool isequal; 1509 1510 PetscFunctionBegin; 1511 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1512 if (call == MAT_REUSE_MATRIX) { 1513 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1514 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1515 } else { 1516 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1517 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1518 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1519 } 1520 1521 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1522 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1523 if (call == MAT_INITIAL_MATRIX) { 1524 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1525 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1531 { 1532 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1533 PetscErrorCode ierr; 1534 1535 PetscFunctionBegin; 1536 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1537 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1542 { 1543 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1544 Mat A = a->A,B = a->B; 1545 PetscErrorCode ierr; 1546 PetscReal isend[5],irecv[5]; 1547 1548 PetscFunctionBegin; 1549 info->block_size = (PetscReal)matin->rmap->bs; 1550 1551 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1552 1553 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1554 isend[3] = info->memory; isend[4] = info->mallocs; 1555 1556 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1557 1558 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1559 isend[3] += info->memory; isend[4] += info->mallocs; 1560 if (flag == MAT_LOCAL) { 1561 info->nz_used = isend[0]; 1562 info->nz_allocated = isend[1]; 1563 info->nz_unneeded = isend[2]; 1564 info->memory = isend[3]; 1565 info->mallocs = isend[4]; 1566 } else if (flag == MAT_GLOBAL_MAX) { 1567 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1568 1569 info->nz_used = irecv[0]; 1570 info->nz_allocated = irecv[1]; 1571 info->nz_unneeded = irecv[2]; 1572 info->memory = irecv[3]; 1573 info->mallocs = irecv[4]; 1574 } else if (flag == MAT_GLOBAL_SUM) { 1575 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1576 1577 info->nz_used = irecv[0]; 1578 info->nz_allocated = irecv[1]; 1579 info->nz_unneeded = irecv[2]; 1580 info->memory = irecv[3]; 1581 info->mallocs = irecv[4]; 1582 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1583 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1584 info->fill_ratio_needed = 0; 1585 info->factor_mallocs = 0; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1590 { 1591 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1592 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 switch (op) { 1597 case MAT_NEW_NONZERO_LOCATIONS: 1598 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1599 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1600 case MAT_KEEP_NONZERO_PATTERN: 1601 case MAT_SUBMAT_SINGLEIS: 1602 case MAT_NEW_NONZERO_LOCATION_ERR: 1603 MatCheckPreallocated(A,1); 1604 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1605 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1606 break; 1607 case MAT_ROW_ORIENTED: 1608 MatCheckPreallocated(A,1); 1609 a->roworiented = flg; 1610 1611 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1612 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1613 break; 1614 case MAT_NEW_DIAGONALS: 1615 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1616 break; 1617 case MAT_IGNORE_OFF_PROC_ENTRIES: 1618 a->donotstash = flg; 1619 break; 1620 case MAT_USE_HASH_TABLE: 1621 a->ht_flag = flg; 1622 break; 1623 case MAT_HERMITIAN: 1624 MatCheckPreallocated(A,1); 1625 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1626 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1627 1628 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1629 break; 1630 case MAT_SPD: 1631 A->spd_set = PETSC_TRUE; 1632 A->spd = flg; 1633 if (flg) { 1634 A->symmetric = PETSC_TRUE; 1635 A->structurally_symmetric = PETSC_TRUE; 1636 A->symmetric_set = PETSC_TRUE; 1637 A->structurally_symmetric_set = PETSC_TRUE; 1638 } 1639 break; 1640 case MAT_SYMMETRIC: 1641 MatCheckPreallocated(A,1); 1642 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1643 break; 1644 case MAT_STRUCTURALLY_SYMMETRIC: 1645 MatCheckPreallocated(A,1); 1646 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1647 break; 1648 case MAT_SYMMETRY_ETERNAL: 1649 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1650 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1651 break; 1652 case MAT_IGNORE_LOWER_TRIANGULAR: 1653 aA->ignore_ltriangular = flg; 1654 break; 1655 case MAT_ERROR_LOWER_TRIANGULAR: 1656 aA->ignore_ltriangular = flg; 1657 break; 1658 case MAT_GETROW_UPPERTRIANGULAR: 1659 aA->getrow_utriangular = flg; 1660 break; 1661 default: 1662 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1663 } 1664 PetscFunctionReturn(0); 1665 } 1666 1667 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1668 { 1669 PetscErrorCode ierr; 1670 1671 PetscFunctionBegin; 1672 if (reuse == MAT_INITIAL_MATRIX) { 1673 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1674 } else if (reuse == MAT_REUSE_MATRIX) { 1675 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1676 } 1677 PetscFunctionReturn(0); 1678 } 1679 1680 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1681 { 1682 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1683 Mat a = baij->A, b=baij->B; 1684 PetscErrorCode ierr; 1685 PetscInt nv,m,n; 1686 PetscBool flg; 1687 1688 PetscFunctionBegin; 1689 if (ll != rr) { 1690 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1691 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1692 } 1693 if (!ll) PetscFunctionReturn(0); 1694 1695 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1696 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1697 1698 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1699 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1700 1701 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1702 1703 /* left diagonalscale the off-diagonal part */ 1704 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1705 1706 /* scale the diagonal part */ 1707 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1708 1709 /* right diagonalscale the off-diagonal part */ 1710 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1711 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1716 { 1717 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1718 PetscErrorCode ierr; 1719 1720 PetscFunctionBegin; 1721 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1722 PetscFunctionReturn(0); 1723 } 1724 1725 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1726 1727 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1728 { 1729 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1730 Mat a,b,c,d; 1731 PetscBool flg; 1732 PetscErrorCode ierr; 1733 1734 PetscFunctionBegin; 1735 a = matA->A; b = matA->B; 1736 c = matB->A; d = matB->B; 1737 1738 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1739 if (flg) { 1740 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1741 } 1742 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 1746 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1747 { 1748 PetscErrorCode ierr; 1749 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1750 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1751 1752 PetscFunctionBegin; 1753 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1754 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1755 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1756 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1757 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1758 } else { 1759 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1760 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1761 } 1762 PetscFunctionReturn(0); 1763 } 1764 1765 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1766 { 1767 PetscErrorCode ierr; 1768 1769 PetscFunctionBegin; 1770 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1771 PetscFunctionReturn(0); 1772 } 1773 1774 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1775 { 1776 PetscErrorCode ierr; 1777 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1778 PetscBLASInt bnz,one=1; 1779 Mat_SeqSBAIJ *xa,*ya; 1780 Mat_SeqBAIJ *xb,*yb; 1781 1782 PetscFunctionBegin; 1783 if (str == SAME_NONZERO_PATTERN) { 1784 PetscScalar alpha = a; 1785 xa = (Mat_SeqSBAIJ*)xx->A->data; 1786 ya = (Mat_SeqSBAIJ*)yy->A->data; 1787 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1788 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1789 xb = (Mat_SeqBAIJ*)xx->B->data; 1790 yb = (Mat_SeqBAIJ*)yy->B->data; 1791 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1792 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1793 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1794 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1795 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1796 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1797 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1798 } else { 1799 Mat B; 1800 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1801 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1802 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1803 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1804 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1805 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1806 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1807 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1808 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1809 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1810 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1811 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1812 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1813 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1814 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1815 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1816 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1817 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1818 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1819 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1820 } 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1825 { 1826 PetscErrorCode ierr; 1827 PetscInt i; 1828 PetscBool flg; 1829 1830 PetscFunctionBegin; 1831 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1832 for (i=0; i<n; i++) { 1833 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1834 if (!flg) { 1835 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1836 } 1837 } 1838 PetscFunctionReturn(0); 1839 } 1840 1841 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1842 { 1843 PetscErrorCode ierr; 1844 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1845 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1846 1847 PetscFunctionBegin; 1848 if (!Y->preallocated) { 1849 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1850 } else if (!aij->nz) { 1851 PetscInt nonew = aij->nonew; 1852 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1853 aij->nonew = nonew; 1854 } 1855 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1860 { 1861 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1866 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1867 if (d) { 1868 PetscInt rstart; 1869 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1870 *d += rstart/A->rmap->bs; 1871 1872 } 1873 PetscFunctionReturn(0); 1874 } 1875 1876 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1877 { 1878 PetscFunctionBegin; 1879 *a = ((Mat_MPISBAIJ*)A->data)->A; 1880 PetscFunctionReturn(0); 1881 } 1882 1883 /* -------------------------------------------------------------------*/ 1884 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1885 MatGetRow_MPISBAIJ, 1886 MatRestoreRow_MPISBAIJ, 1887 MatMult_MPISBAIJ, 1888 /* 4*/ MatMultAdd_MPISBAIJ, 1889 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1890 MatMultAdd_MPISBAIJ, 1891 0, 1892 0, 1893 0, 1894 /* 10*/ 0, 1895 0, 1896 0, 1897 MatSOR_MPISBAIJ, 1898 MatTranspose_MPISBAIJ, 1899 /* 15*/ MatGetInfo_MPISBAIJ, 1900 MatEqual_MPISBAIJ, 1901 MatGetDiagonal_MPISBAIJ, 1902 MatDiagonalScale_MPISBAIJ, 1903 MatNorm_MPISBAIJ, 1904 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1905 MatAssemblyEnd_MPISBAIJ, 1906 MatSetOption_MPISBAIJ, 1907 MatZeroEntries_MPISBAIJ, 1908 /* 24*/ 0, 1909 0, 1910 0, 1911 0, 1912 0, 1913 /* 29*/ MatSetUp_MPISBAIJ, 1914 0, 1915 0, 1916 MatGetDiagonalBlock_MPISBAIJ, 1917 0, 1918 /* 34*/ MatDuplicate_MPISBAIJ, 1919 0, 1920 0, 1921 0, 1922 0, 1923 /* 39*/ MatAXPY_MPISBAIJ, 1924 MatCreateSubMatrices_MPISBAIJ, 1925 MatIncreaseOverlap_MPISBAIJ, 1926 MatGetValues_MPISBAIJ, 1927 MatCopy_MPISBAIJ, 1928 /* 44*/ 0, 1929 MatScale_MPISBAIJ, 1930 MatShift_MPISBAIJ, 1931 0, 1932 0, 1933 /* 49*/ 0, 1934 0, 1935 0, 1936 0, 1937 0, 1938 /* 54*/ 0, 1939 0, 1940 MatSetUnfactored_MPISBAIJ, 1941 0, 1942 MatSetValuesBlocked_MPISBAIJ, 1943 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1944 0, 1945 0, 1946 0, 1947 0, 1948 /* 64*/ 0, 1949 0, 1950 0, 1951 0, 1952 0, 1953 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1954 0, 1955 0, 1956 0, 1957 0, 1958 /* 74*/ 0, 1959 0, 1960 0, 1961 0, 1962 0, 1963 /* 79*/ 0, 1964 0, 1965 0, 1966 0, 1967 MatLoad_MPISBAIJ, 1968 /* 84*/ 0, 1969 0, 1970 0, 1971 0, 1972 0, 1973 /* 89*/ 0, 1974 0, 1975 0, 1976 0, 1977 0, 1978 /* 94*/ 0, 1979 0, 1980 0, 1981 0, 1982 0, 1983 /* 99*/ 0, 1984 0, 1985 0, 1986 0, 1987 0, 1988 /*104*/ 0, 1989 MatRealPart_MPISBAIJ, 1990 MatImaginaryPart_MPISBAIJ, 1991 MatGetRowUpperTriangular_MPISBAIJ, 1992 MatRestoreRowUpperTriangular_MPISBAIJ, 1993 /*109*/ 0, 1994 0, 1995 0, 1996 0, 1997 MatMissingDiagonal_MPISBAIJ, 1998 /*114*/ 0, 1999 0, 2000 0, 2001 0, 2002 0, 2003 /*119*/ 0, 2004 0, 2005 0, 2006 0, 2007 0, 2008 /*124*/ 0, 2009 0, 2010 0, 2011 0, 2012 0, 2013 /*129*/ 0, 2014 0, 2015 0, 2016 0, 2017 0, 2018 /*134*/ 0, 2019 0, 2020 0, 2021 0, 2022 0, 2023 /*139*/ MatSetBlockSizes_Default, 2024 0, 2025 0, 2026 0, 2027 0, 2028 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2029 }; 2030 2031 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2032 { 2033 Mat_MPISBAIJ *b; 2034 PetscErrorCode ierr; 2035 PetscInt i,mbs,Mbs; 2036 2037 PetscFunctionBegin; 2038 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2039 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2040 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2041 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2042 2043 b = (Mat_MPISBAIJ*)B->data; 2044 mbs = B->rmap->n/bs; 2045 Mbs = B->rmap->N/bs; 2046 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2047 2048 B->rmap->bs = bs; 2049 b->bs2 = bs*bs; 2050 b->mbs = mbs; 2051 b->Mbs = Mbs; 2052 b->nbs = B->cmap->n/bs; 2053 b->Nbs = B->cmap->N/bs; 2054 2055 for (i=0; i<=b->size; i++) { 2056 b->rangebs[i] = B->rmap->range[i]/bs; 2057 } 2058 b->rstartbs = B->rmap->rstart/bs; 2059 b->rendbs = B->rmap->rend/bs; 2060 2061 b->cstartbs = B->cmap->rstart/bs; 2062 b->cendbs = B->cmap->rend/bs; 2063 2064 #if defined(PETSC_USE_CTABLE) 2065 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2066 #else 2067 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2068 #endif 2069 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2070 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2071 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2072 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2073 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2074 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2075 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2076 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2077 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2078 2079 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2080 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2081 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2082 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2083 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2084 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2085 2086 if (!B->preallocated) { 2087 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2088 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2089 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2090 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2091 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2092 } 2093 2094 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2095 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2096 2097 B->preallocated = PETSC_TRUE; 2098 B->was_assembled = PETSC_FALSE; 2099 B->assembled = PETSC_FALSE; 2100 PetscFunctionReturn(0); 2101 } 2102 2103 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2104 { 2105 PetscInt m,rstart,cstart,cend; 2106 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2107 const PetscInt *JJ =0; 2108 PetscScalar *values=0; 2109 PetscErrorCode ierr; 2110 2111 PetscFunctionBegin; 2112 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2113 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2114 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2115 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2116 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2117 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2118 m = B->rmap->n/bs; 2119 rstart = B->rmap->rstart/bs; 2120 cstart = B->cmap->rstart/bs; 2121 cend = B->cmap->rend/bs; 2122 2123 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2124 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2125 for (i=0; i<m; i++) { 2126 nz = ii[i+1] - ii[i]; 2127 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2128 nz_max = PetscMax(nz_max,nz); 2129 JJ = jj + ii[i]; 2130 for (j=0; j<nz; j++) { 2131 if (*JJ >= cstart) break; 2132 JJ++; 2133 } 2134 d = 0; 2135 for (; j<nz; j++) { 2136 if (*JJ++ >= cend) break; 2137 d++; 2138 } 2139 d_nnz[i] = d; 2140 o_nnz[i] = nz - d; 2141 } 2142 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2143 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2144 2145 values = (PetscScalar*)V; 2146 if (!values) { 2147 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2148 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2149 } 2150 for (i=0; i<m; i++) { 2151 PetscInt row = i + rstart; 2152 PetscInt ncols = ii[i+1] - ii[i]; 2153 const PetscInt *icols = jj + ii[i]; 2154 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2155 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2156 } 2157 2158 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2159 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2160 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2161 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2162 PetscFunctionReturn(0); 2163 } 2164 2165 /*MC 2166 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2167 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2168 the matrix is stored. 2169 2170 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2171 can call MatSetOption(Mat, MAT_HERMITIAN); 2172 2173 Options Database Keys: 2174 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2175 2176 Level: beginner 2177 2178 .seealso: MatCreateMPISBAIJ 2179 M*/ 2180 2181 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2182 2183 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2184 { 2185 Mat_MPISBAIJ *b; 2186 PetscErrorCode ierr; 2187 PetscBool flg = PETSC_FALSE; 2188 2189 PetscFunctionBegin; 2190 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2191 B->data = (void*)b; 2192 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2193 2194 B->ops->destroy = MatDestroy_MPISBAIJ; 2195 B->ops->view = MatView_MPISBAIJ; 2196 B->assembled = PETSC_FALSE; 2197 B->insertmode = NOT_SET_VALUES; 2198 2199 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2200 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2201 2202 /* build local table of row and column ownerships */ 2203 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2204 2205 /* build cache for off array entries formed */ 2206 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2207 2208 b->donotstash = PETSC_FALSE; 2209 b->colmap = NULL; 2210 b->garray = NULL; 2211 b->roworiented = PETSC_TRUE; 2212 2213 /* stuff used in block assembly */ 2214 b->barray = 0; 2215 2216 /* stuff used for matrix vector multiply */ 2217 b->lvec = 0; 2218 b->Mvctx = 0; 2219 b->slvec0 = 0; 2220 b->slvec0b = 0; 2221 b->slvec1 = 0; 2222 b->slvec1a = 0; 2223 b->slvec1b = 0; 2224 b->sMvctx = 0; 2225 2226 /* stuff for MatGetRow() */ 2227 b->rowindices = 0; 2228 b->rowvalues = 0; 2229 b->getrowactive = PETSC_FALSE; 2230 2231 /* hash table stuff */ 2232 b->ht = 0; 2233 b->hd = 0; 2234 b->ht_size = 0; 2235 b->ht_flag = PETSC_FALSE; 2236 b->ht_fact = 0; 2237 b->ht_total_ct = 0; 2238 b->ht_insert_ct = 0; 2239 2240 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2241 b->ijonly = PETSC_FALSE; 2242 2243 b->in_loc = 0; 2244 b->v_loc = 0; 2245 b->n_loc = 0; 2246 2247 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2248 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2249 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2250 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2251 #if defined(PETSC_HAVE_ELEMENTAL) 2252 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2253 #endif 2254 2255 B->symmetric = PETSC_TRUE; 2256 B->structurally_symmetric = PETSC_TRUE; 2257 B->symmetric_set = PETSC_TRUE; 2258 B->structurally_symmetric_set = PETSC_TRUE; 2259 2260 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2261 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2262 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2263 if (flg) { 2264 PetscReal fact = 1.39; 2265 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2266 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2267 if (fact <= 1.0) fact = 1.39; 2268 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2269 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2270 } 2271 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2272 PetscFunctionReturn(0); 2273 } 2274 2275 /*MC 2276 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2277 2278 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2279 and MATMPISBAIJ otherwise. 2280 2281 Options Database Keys: 2282 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2283 2284 Level: beginner 2285 2286 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2287 M*/ 2288 2289 /*@C 2290 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2291 the user should preallocate the matrix storage by setting the parameters 2292 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2293 performance can be increased by more than a factor of 50. 2294 2295 Collective on Mat 2296 2297 Input Parameters: 2298 + B - the matrix 2299 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2300 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2301 . d_nz - number of block nonzeros per block row in diagonal portion of local 2302 submatrix (same for all local rows) 2303 . d_nnz - array containing the number of block nonzeros in the various block rows 2304 in the upper triangular and diagonal part of the in diagonal portion of the local 2305 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2306 for the diagonal entry and set a value even if it is zero. 2307 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2308 submatrix (same for all local rows). 2309 - o_nnz - array containing the number of nonzeros in the various block rows of the 2310 off-diagonal portion of the local submatrix that is right of the diagonal 2311 (possibly different for each block row) or NULL. 2312 2313 2314 Options Database Keys: 2315 . -mat_no_unroll - uses code that does not unroll the loops in the 2316 block calculations (much slower) 2317 . -mat_block_size - size of the blocks to use 2318 2319 Notes: 2320 2321 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2322 than it must be used on all processors that share the object for that argument. 2323 2324 If the *_nnz parameter is given then the *_nz parameter is ignored 2325 2326 Storage Information: 2327 For a square global matrix we define each processor's diagonal portion 2328 to be its local rows and the corresponding columns (a square submatrix); 2329 each processor's off-diagonal portion encompasses the remainder of the 2330 local matrix (a rectangular submatrix). 2331 2332 The user can specify preallocated storage for the diagonal part of 2333 the local submatrix with either d_nz or d_nnz (not both). Set 2334 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2335 memory allocation. Likewise, specify preallocated storage for the 2336 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2337 2338 You can call MatGetInfo() to get information on how effective the preallocation was; 2339 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2340 You can also run with the option -info and look for messages with the string 2341 malloc in them to see if additional memory allocation was needed. 2342 2343 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2344 the figure below we depict these three local rows and all columns (0-11). 2345 2346 .vb 2347 0 1 2 3 4 5 6 7 8 9 10 11 2348 -------------------------- 2349 row 3 |. . . d d d o o o o o o 2350 row 4 |. . . d d d o o o o o o 2351 row 5 |. . . d d d o o o o o o 2352 -------------------------- 2353 .ve 2354 2355 Thus, any entries in the d locations are stored in the d (diagonal) 2356 submatrix, and any entries in the o locations are stored in the 2357 o (off-diagonal) submatrix. Note that the d matrix is stored in 2358 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2359 2360 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2361 plus the diagonal part of the d matrix, 2362 and o_nz should indicate the number of block nonzeros per row in the o matrix 2363 2364 In general, for PDE problems in which most nonzeros are near the diagonal, 2365 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2366 or you will get TERRIBLE performance; see the users' manual chapter on 2367 matrices. 2368 2369 Level: intermediate 2370 2371 .keywords: matrix, block, aij, compressed row, sparse, parallel 2372 2373 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2374 @*/ 2375 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2376 { 2377 PetscErrorCode ierr; 2378 2379 PetscFunctionBegin; 2380 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2381 PetscValidType(B,1); 2382 PetscValidLogicalCollectiveInt(B,bs,2); 2383 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2384 PetscFunctionReturn(0); 2385 } 2386 2387 /*@C 2388 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2389 (block compressed row). For good matrix assembly performance 2390 the user should preallocate the matrix storage by setting the parameters 2391 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2392 performance can be increased by more than a factor of 50. 2393 2394 Collective on MPI_Comm 2395 2396 Input Parameters: 2397 + comm - MPI communicator 2398 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2399 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2400 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2401 This value should be the same as the local size used in creating the 2402 y vector for the matrix-vector product y = Ax. 2403 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2404 This value should be the same as the local size used in creating the 2405 x vector for the matrix-vector product y = Ax. 2406 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2407 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2408 . d_nz - number of block nonzeros per block row in diagonal portion of local 2409 submatrix (same for all local rows) 2410 . d_nnz - array containing the number of block nonzeros in the various block rows 2411 in the upper triangular portion of the in diagonal portion of the local 2412 (possibly different for each block block row) or NULL. 2413 If you plan to factor the matrix you must leave room for the diagonal entry and 2414 set its value even if it is zero. 2415 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2416 submatrix (same for all local rows). 2417 - o_nnz - array containing the number of nonzeros in the various block rows of the 2418 off-diagonal portion of the local submatrix (possibly different for 2419 each block row) or NULL. 2420 2421 Output Parameter: 2422 . A - the matrix 2423 2424 Options Database Keys: 2425 . -mat_no_unroll - uses code that does not unroll the loops in the 2426 block calculations (much slower) 2427 . -mat_block_size - size of the blocks to use 2428 . -mat_mpi - use the parallel matrix data structures even on one processor 2429 (defaults to using SeqBAIJ format on one processor) 2430 2431 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2432 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2433 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2434 2435 Notes: 2436 The number of rows and columns must be divisible by blocksize. 2437 This matrix type does not support complex Hermitian operation. 2438 2439 The user MUST specify either the local or global matrix dimensions 2440 (possibly both). 2441 2442 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2443 than it must be used on all processors that share the object for that argument. 2444 2445 If the *_nnz parameter is given then the *_nz parameter is ignored 2446 2447 Storage Information: 2448 For a square global matrix we define each processor's diagonal portion 2449 to be its local rows and the corresponding columns (a square submatrix); 2450 each processor's off-diagonal portion encompasses the remainder of the 2451 local matrix (a rectangular submatrix). 2452 2453 The user can specify preallocated storage for the diagonal part of 2454 the local submatrix with either d_nz or d_nnz (not both). Set 2455 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2456 memory allocation. Likewise, specify preallocated storage for the 2457 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2458 2459 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2460 the figure below we depict these three local rows and all columns (0-11). 2461 2462 .vb 2463 0 1 2 3 4 5 6 7 8 9 10 11 2464 -------------------------- 2465 row 3 |. . . d d d o o o o o o 2466 row 4 |. . . d d d o o o o o o 2467 row 5 |. . . d d d o o o o o o 2468 -------------------------- 2469 .ve 2470 2471 Thus, any entries in the d locations are stored in the d (diagonal) 2472 submatrix, and any entries in the o locations are stored in the 2473 o (off-diagonal) submatrix. Note that the d matrix is stored in 2474 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2475 2476 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2477 plus the diagonal part of the d matrix, 2478 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2479 In general, for PDE problems in which most nonzeros are near the diagonal, 2480 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2481 or you will get TERRIBLE performance; see the users' manual chapter on 2482 matrices. 2483 2484 Level: intermediate 2485 2486 .keywords: matrix, block, aij, compressed row, sparse, parallel 2487 2488 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2489 @*/ 2490 2491 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2492 { 2493 PetscErrorCode ierr; 2494 PetscMPIInt size; 2495 2496 PetscFunctionBegin; 2497 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2498 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2499 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2500 if (size > 1) { 2501 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2502 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2503 } else { 2504 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2505 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2506 } 2507 PetscFunctionReturn(0); 2508 } 2509 2510 2511 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2512 { 2513 Mat mat; 2514 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2515 PetscErrorCode ierr; 2516 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2517 PetscScalar *array; 2518 2519 PetscFunctionBegin; 2520 *newmat = 0; 2521 2522 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2523 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2524 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2525 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2526 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2527 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2528 2529 mat->factortype = matin->factortype; 2530 mat->preallocated = PETSC_TRUE; 2531 mat->assembled = PETSC_TRUE; 2532 mat->insertmode = NOT_SET_VALUES; 2533 2534 a = (Mat_MPISBAIJ*)mat->data; 2535 a->bs2 = oldmat->bs2; 2536 a->mbs = oldmat->mbs; 2537 a->nbs = oldmat->nbs; 2538 a->Mbs = oldmat->Mbs; 2539 a->Nbs = oldmat->Nbs; 2540 2541 2542 a->size = oldmat->size; 2543 a->rank = oldmat->rank; 2544 a->donotstash = oldmat->donotstash; 2545 a->roworiented = oldmat->roworiented; 2546 a->rowindices = 0; 2547 a->rowvalues = 0; 2548 a->getrowactive = PETSC_FALSE; 2549 a->barray = 0; 2550 a->rstartbs = oldmat->rstartbs; 2551 a->rendbs = oldmat->rendbs; 2552 a->cstartbs = oldmat->cstartbs; 2553 a->cendbs = oldmat->cendbs; 2554 2555 /* hash table stuff */ 2556 a->ht = 0; 2557 a->hd = 0; 2558 a->ht_size = 0; 2559 a->ht_flag = oldmat->ht_flag; 2560 a->ht_fact = oldmat->ht_fact; 2561 a->ht_total_ct = 0; 2562 a->ht_insert_ct = 0; 2563 2564 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2565 if (oldmat->colmap) { 2566 #if defined(PETSC_USE_CTABLE) 2567 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2568 #else 2569 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2570 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2571 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2572 #endif 2573 } else a->colmap = 0; 2574 2575 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2576 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2577 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2578 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2579 } else a->garray = 0; 2580 2581 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2582 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2583 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2584 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2585 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2586 2587 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2588 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2589 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2590 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2591 2592 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2593 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2594 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2595 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2596 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2597 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2598 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2599 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2600 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2601 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2602 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2603 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2604 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2605 2606 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2607 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2608 a->sMvctx = oldmat->sMvctx; 2609 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2610 2611 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2612 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2613 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2614 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2615 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2616 *newmat = mat; 2617 PetscFunctionReturn(0); 2618 } 2619 2620 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2621 { 2622 PetscErrorCode ierr; 2623 PetscInt i,nz,j,rstart,rend; 2624 PetscScalar *vals,*buf; 2625 MPI_Comm comm; 2626 MPI_Status status; 2627 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2628 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2629 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2630 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2631 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2632 PetscInt dcount,kmax,k,nzcount,tmp; 2633 int fd; 2634 2635 PetscFunctionBegin; 2636 /* force binary viewer to load .info file if it has not yet done so */ 2637 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2638 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2639 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2640 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2641 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2642 if (bs < 0) bs = 1; 2643 2644 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2645 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2646 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2647 if (!rank) { 2648 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2649 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2650 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2651 } 2652 2653 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2654 M = header[1]; 2655 N = header[2]; 2656 2657 /* If global sizes are set, check if they are consistent with that given in the file */ 2658 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2659 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2660 2661 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2662 2663 /* 2664 This code adds extra rows to make sure the number of rows is 2665 divisible by the blocksize 2666 */ 2667 Mbs = M/bs; 2668 extra_rows = bs - M + bs*(Mbs); 2669 if (extra_rows == bs) extra_rows = 0; 2670 else Mbs++; 2671 if (extra_rows &&!rank) { 2672 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2673 } 2674 2675 /* determine ownership of all rows */ 2676 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2677 mbs = Mbs/size + ((Mbs % size) > rank); 2678 m = mbs*bs; 2679 } else { /* User Set */ 2680 m = newmat->rmap->n; 2681 mbs = m/bs; 2682 } 2683 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2684 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2685 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2686 rowners[0] = 0; 2687 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2688 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2689 rstart = rowners[rank]; 2690 rend = rowners[rank+1]; 2691 2692 /* distribute row lengths to all processors */ 2693 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2694 if (!rank) { 2695 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2696 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2697 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2698 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2699 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2700 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2701 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2702 } else { 2703 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2704 } 2705 2706 if (!rank) { /* procs[0] */ 2707 /* calculate the number of nonzeros on each processor */ 2708 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2709 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2710 for (i=0; i<size; i++) { 2711 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2712 procsnz[i] += rowlengths[j]; 2713 } 2714 } 2715 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2716 2717 /* determine max buffer needed and allocate it */ 2718 maxnz = 0; 2719 for (i=0; i<size; i++) { 2720 maxnz = PetscMax(maxnz,procsnz[i]); 2721 } 2722 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2723 2724 /* read in my part of the matrix column indices */ 2725 nz = procsnz[0]; 2726 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2727 mycols = ibuf; 2728 if (size == 1) nz -= extra_rows; 2729 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2730 if (size == 1) { 2731 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2732 } 2733 2734 /* read in every ones (except the last) and ship off */ 2735 for (i=1; i<size-1; i++) { 2736 nz = procsnz[i]; 2737 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2738 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2739 } 2740 /* read in the stuff for the last proc */ 2741 if (size != 1) { 2742 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2743 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2744 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2745 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2746 } 2747 ierr = PetscFree(cols);CHKERRQ(ierr); 2748 } else { /* procs[i], i>0 */ 2749 /* determine buffer space needed for message */ 2750 nz = 0; 2751 for (i=0; i<m; i++) nz += locrowlens[i]; 2752 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2753 mycols = ibuf; 2754 /* receive message of column indices*/ 2755 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2756 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2757 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2758 } 2759 2760 /* loop over local rows, determining number of off diagonal entries */ 2761 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2762 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2763 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2764 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2765 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2766 rowcount = 0; 2767 nzcount = 0; 2768 for (i=0; i<mbs; i++) { 2769 dcount = 0; 2770 odcount = 0; 2771 for (j=0; j<bs; j++) { 2772 kmax = locrowlens[rowcount]; 2773 for (k=0; k<kmax; k++) { 2774 tmp = mycols[nzcount++]/bs; /* block col. index */ 2775 if (!mask[tmp]) { 2776 mask[tmp] = 1; 2777 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2778 else masked1[dcount++] = tmp; /* entry in diag portion */ 2779 } 2780 } 2781 rowcount++; 2782 } 2783 2784 dlens[i] = dcount; /* d_nzz[i] */ 2785 odlens[i] = odcount; /* o_nzz[i] */ 2786 2787 /* zero out the mask elements we set */ 2788 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2789 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2790 } 2791 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2792 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2793 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2794 2795 if (!rank) { 2796 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2797 /* read in my part of the matrix numerical values */ 2798 nz = procsnz[0]; 2799 vals = buf; 2800 mycols = ibuf; 2801 if (size == 1) nz -= extra_rows; 2802 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2803 if (size == 1) { 2804 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2805 } 2806 2807 /* insert into matrix */ 2808 jj = rstart*bs; 2809 for (i=0; i<m; i++) { 2810 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2811 mycols += locrowlens[i]; 2812 vals += locrowlens[i]; 2813 jj++; 2814 } 2815 2816 /* read in other processors (except the last one) and ship out */ 2817 for (i=1; i<size-1; i++) { 2818 nz = procsnz[i]; 2819 vals = buf; 2820 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2821 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2822 } 2823 /* the last proc */ 2824 if (size != 1) { 2825 nz = procsnz[i] - extra_rows; 2826 vals = buf; 2827 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2828 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2829 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2830 } 2831 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2832 2833 } else { 2834 /* receive numeric values */ 2835 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2836 2837 /* receive message of values*/ 2838 vals = buf; 2839 mycols = ibuf; 2840 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2841 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2842 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2843 2844 /* insert into matrix */ 2845 jj = rstart*bs; 2846 for (i=0; i<m; i++) { 2847 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2848 mycols += locrowlens[i]; 2849 vals += locrowlens[i]; 2850 jj++; 2851 } 2852 } 2853 2854 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2855 ierr = PetscFree(buf);CHKERRQ(ierr); 2856 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2857 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2858 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2859 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2860 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 PetscFunctionReturn(0); 2863 } 2864 2865 /*XXXXX@ 2866 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2867 2868 Input Parameters: 2869 . mat - the matrix 2870 . fact - factor 2871 2872 Not Collective on Mat, each process can have a different hash factor 2873 2874 Level: advanced 2875 2876 Notes: 2877 This can also be set by the command line option: -mat_use_hash_table fact 2878 2879 .keywords: matrix, hashtable, factor, HT 2880 2881 .seealso: MatSetOption() 2882 @XXXXX*/ 2883 2884 2885 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2886 { 2887 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2888 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2889 PetscReal atmp; 2890 PetscReal *work,*svalues,*rvalues; 2891 PetscErrorCode ierr; 2892 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2893 PetscMPIInt rank,size; 2894 PetscInt *rowners_bs,dest,count,source; 2895 PetscScalar *va; 2896 MatScalar *ba; 2897 MPI_Status stat; 2898 2899 PetscFunctionBegin; 2900 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2901 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2902 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2903 2904 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2905 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2906 2907 bs = A->rmap->bs; 2908 mbs = a->mbs; 2909 Mbs = a->Mbs; 2910 ba = b->a; 2911 bi = b->i; 2912 bj = b->j; 2913 2914 /* find ownerships */ 2915 rowners_bs = A->rmap->range; 2916 2917 /* each proc creates an array to be distributed */ 2918 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2919 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2920 2921 /* row_max for B */ 2922 if (rank != size-1) { 2923 for (i=0; i<mbs; i++) { 2924 ncols = bi[1] - bi[0]; bi++; 2925 brow = bs*i; 2926 for (j=0; j<ncols; j++) { 2927 bcol = bs*(*bj); 2928 for (kcol=0; kcol<bs; kcol++) { 2929 col = bcol + kcol; /* local col index */ 2930 col += rowners_bs[rank+1]; /* global col index */ 2931 for (krow=0; krow<bs; krow++) { 2932 atmp = PetscAbsScalar(*ba); ba++; 2933 row = brow + krow; /* local row index */ 2934 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2935 if (work[col] < atmp) work[col] = atmp; 2936 } 2937 } 2938 bj++; 2939 } 2940 } 2941 2942 /* send values to its owners */ 2943 for (dest=rank+1; dest<size; dest++) { 2944 svalues = work + rowners_bs[dest]; 2945 count = rowners_bs[dest+1]-rowners_bs[dest]; 2946 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2947 } 2948 } 2949 2950 /* receive values */ 2951 if (rank) { 2952 rvalues = work; 2953 count = rowners_bs[rank+1]-rowners_bs[rank]; 2954 for (source=0; source<rank; source++) { 2955 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2956 /* process values */ 2957 for (i=0; i<count; i++) { 2958 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2959 } 2960 } 2961 } 2962 2963 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2964 ierr = PetscFree(work);CHKERRQ(ierr); 2965 PetscFunctionReturn(0); 2966 } 2967 2968 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2969 { 2970 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2971 PetscErrorCode ierr; 2972 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2973 PetscScalar *x,*ptr,*from; 2974 Vec bb1; 2975 const PetscScalar *b; 2976 2977 PetscFunctionBegin; 2978 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2979 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2980 2981 if (flag == SOR_APPLY_UPPER) { 2982 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2983 PetscFunctionReturn(0); 2984 } 2985 2986 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2987 if (flag & SOR_ZERO_INITIAL_GUESS) { 2988 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2989 its--; 2990 } 2991 2992 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2993 while (its--) { 2994 2995 /* lower triangular part: slvec0b = - B^T*xx */ 2996 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2997 2998 /* copy xx into slvec0a */ 2999 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3000 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3001 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3002 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3003 3004 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3005 3006 /* copy bb into slvec1a */ 3007 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3008 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3009 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3010 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3011 3012 /* set slvec1b = 0 */ 3013 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3014 3015 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3016 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3017 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3018 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3019 3020 /* upper triangular part: bb1 = bb1 - B*x */ 3021 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3022 3023 /* local diagonal sweep */ 3024 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3025 } 3026 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3027 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3028 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3029 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3030 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3031 } else if (flag & SOR_EISENSTAT) { 3032 Vec xx1; 3033 PetscBool hasop; 3034 const PetscScalar *diag; 3035 PetscScalar *sl,scale = (omega - 2.0)/omega; 3036 PetscInt i,n; 3037 3038 if (!mat->xx1) { 3039 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3040 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3041 } 3042 xx1 = mat->xx1; 3043 bb1 = mat->bb1; 3044 3045 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3046 3047 if (!mat->diag) { 3048 /* this is wrong for same matrix with new nonzero values */ 3049 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3050 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3051 } 3052 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3053 3054 if (hasop) { 3055 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3056 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3057 } else { 3058 /* 3059 These two lines are replaced by code that may be a bit faster for a good compiler 3060 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3061 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3062 */ 3063 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3064 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3065 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3066 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3067 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3068 if (omega == 1.0) { 3069 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3070 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3071 } else { 3072 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3073 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3074 } 3075 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3076 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3077 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3078 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3079 } 3080 3081 /* multiply off-diagonal portion of matrix */ 3082 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3083 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3084 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3085 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3086 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3087 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3088 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3089 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3090 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3091 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3092 3093 /* local sweep */ 3094 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3095 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3096 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3097 PetscFunctionReturn(0); 3098 } 3099 3100 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3101 { 3102 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3103 PetscErrorCode ierr; 3104 Vec lvec1,bb1; 3105 3106 PetscFunctionBegin; 3107 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3108 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3109 3110 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3111 if (flag & SOR_ZERO_INITIAL_GUESS) { 3112 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3113 its--; 3114 } 3115 3116 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3117 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3118 while (its--) { 3119 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3120 3121 /* lower diagonal part: bb1 = bb - B^T*xx */ 3122 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3123 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3124 3125 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3126 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3127 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3128 3129 /* upper diagonal part: bb1 = bb1 - B*x */ 3130 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3131 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3132 3133 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3134 3135 /* diagonal sweep */ 3136 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3137 } 3138 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3139 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3140 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3141 PetscFunctionReturn(0); 3142 } 3143 3144 /*@ 3145 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3146 CSR format the local rows. 3147 3148 Collective on MPI_Comm 3149 3150 Input Parameters: 3151 + comm - MPI communicator 3152 . bs - the block size, only a block size of 1 is supported 3153 . m - number of local rows (Cannot be PETSC_DECIDE) 3154 . n - This value should be the same as the local size used in creating the 3155 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3156 calculated if N is given) For square matrices n is almost always m. 3157 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3158 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3159 . i - row indices 3160 . j - column indices 3161 - a - matrix values 3162 3163 Output Parameter: 3164 . mat - the matrix 3165 3166 Level: intermediate 3167 3168 Notes: 3169 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3170 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3171 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3172 3173 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3174 3175 .keywords: matrix, aij, compressed row, sparse, parallel 3176 3177 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3178 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3179 @*/ 3180 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3181 { 3182 PetscErrorCode ierr; 3183 3184 3185 PetscFunctionBegin; 3186 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3187 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3188 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3189 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3190 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3191 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3192 PetscFunctionReturn(0); 3193 } 3194 3195 3196 /*@C 3197 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3198 (the default parallel PETSc format). 3199 3200 Collective on MPI_Comm 3201 3202 Input Parameters: 3203 + B - the matrix 3204 . bs - the block size 3205 . i - the indices into j for the start of each local row (starts with zero) 3206 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3207 - v - optional values in the matrix 3208 3209 Level: developer 3210 3211 .keywords: matrix, aij, compressed row, sparse, parallel 3212 3213 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3214 @*/ 3215 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3216 { 3217 PetscErrorCode ierr; 3218 3219 PetscFunctionBegin; 3220 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3221 PetscFunctionReturn(0); 3222 } 3223 3224 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3225 { 3226 PetscErrorCode ierr; 3227 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3228 PetscInt *indx; 3229 PetscScalar *values; 3230 3231 PetscFunctionBegin; 3232 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3233 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3234 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3235 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3236 PetscInt *bindx,rmax=a->rmax,j; 3237 3238 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3239 mbs = m/bs; Nbs = N/cbs; 3240 if (n == PETSC_DECIDE) { 3241 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3242 } 3243 /* Check sum(n) = Nbs */ 3244 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3245 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3246 3247 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3248 rstart -= mbs; 3249 3250 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3251 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3252 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3253 for (i=0; i<mbs; i++) { 3254 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3255 nnz = nnz/bs; 3256 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3257 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3258 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3259 } 3260 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3261 ierr = PetscFree(bindx);CHKERRQ(ierr); 3262 3263 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3264 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3265 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3266 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3267 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3268 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3269 } 3270 3271 /* numeric phase */ 3272 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3273 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3274 3275 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3276 for (i=0; i<m; i++) { 3277 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3278 Ii = i + rstart; 3279 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3280 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3281 } 3282 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3283 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3284 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3285 PetscFunctionReturn(0); 3286 } 3287