1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 11 { 12 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 17 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 18 PetscFunctionReturn(0); 19 } 20 21 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 22 { 23 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 28 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 29 PetscFunctionReturn(0); 30 } 31 32 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 33 { \ 34 \ 35 brow = row/bs; \ 36 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 37 rmax = aimax[brow]; nrow = ailen[brow]; \ 38 bcol = col/bs; \ 39 ridx = row % bs; cidx = col % bs; \ 40 low = 0; high = nrow; \ 41 while (high-low > 3) { \ 42 t = (low+high)/2; \ 43 if (rp[t] > bcol) high = t; \ 44 else low = t; \ 45 } \ 46 for (_i=low; _i<high; _i++) { \ 47 if (rp[_i] > bcol) break; \ 48 if (rp[_i] == bcol) { \ 49 bap = ap + bs2*_i + bs*cidx + ridx; \ 50 if (addv == ADD_VALUES) *bap += value; \ 51 else *bap = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (a->nonew == 1) goto a_noinsert; \ 56 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 57 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 58 N = nrow++ - 1; \ 59 /* shift up all the later entries in this row */ \ 60 for (ii=N; ii>=_i; ii--) { \ 61 rp[ii+1] = rp[ii]; \ 62 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 63 } \ 64 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 65 rp[_i] = bcol; \ 66 ap[bs2*_i + bs*cidx + ridx] = value; \ 67 A->nonzerostate++;\ 68 a_noinsert:; \ 69 ailen[brow] = nrow; \ 70 } 71 72 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 73 { \ 74 brow = row/bs; \ 75 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 76 rmax = bimax[brow]; nrow = bilen[brow]; \ 77 bcol = col/bs; \ 78 ridx = row % bs; cidx = col % bs; \ 79 low = 0; high = nrow; \ 80 while (high-low > 3) { \ 81 t = (low+high)/2; \ 82 if (rp[t] > bcol) high = t; \ 83 else low = t; \ 84 } \ 85 for (_i=low; _i<high; _i++) { \ 86 if (rp[_i] > bcol) break; \ 87 if (rp[_i] == bcol) { \ 88 bap = ap + bs2*_i + bs*cidx + ridx; \ 89 if (addv == ADD_VALUES) *bap += value; \ 90 else *bap = value; \ 91 goto b_noinsert; \ 92 } \ 93 } \ 94 if (b->nonew == 1) goto b_noinsert; \ 95 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 96 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 97 N = nrow++ - 1; \ 98 /* shift up all the later entries in this row */ \ 99 for (ii=N; ii>=_i; ii--) { \ 100 rp[ii+1] = rp[ii]; \ 101 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 102 } \ 103 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 104 rp[_i] = bcol; \ 105 ap[bs2*_i + bs*cidx + ridx] = value; \ 106 B->nonzerostate++;\ 107 b_noinsert:; \ 108 bilen[brow] = nrow; \ 109 } 110 111 /* Only add/insert a(i,j) with i<=j (blocks). 112 Any a(i,j) with i>j input by user is ingored. 113 */ 114 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 115 { 116 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 117 MatScalar value; 118 PetscBool roworiented = baij->roworiented; 119 PetscErrorCode ierr; 120 PetscInt i,j,row,col; 121 PetscInt rstart_orig=mat->rmap->rstart; 122 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 123 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 124 125 /* Some Variables required in the macro */ 126 Mat A = baij->A; 127 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 128 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 129 MatScalar *aa =a->a; 130 131 Mat B = baij->B; 132 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 133 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 134 MatScalar *ba =b->a; 135 136 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 137 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 138 MatScalar *ap,*bap; 139 140 /* for stash */ 141 PetscInt n_loc, *in_loc = NULL; 142 MatScalar *v_loc = NULL; 143 144 PetscFunctionBegin; 145 if (!baij->donotstash) { 146 if (n > baij->n_loc) { 147 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 148 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 149 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 150 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 151 152 baij->n_loc = n; 153 } 154 in_loc = baij->in_loc; 155 v_loc = baij->v_loc; 156 } 157 158 for (i=0; i<m; i++) { 159 if (im[i] < 0) continue; 160 #if defined(PETSC_USE_DEBUG) 161 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 162 #endif 163 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 164 row = im[i] - rstart_orig; /* local row index */ 165 for (j=0; j<n; j++) { 166 if (im[i]/bs > in[j]/bs) { 167 if (a->ignore_ltriangular) { 168 continue; /* ignore lower triangular blocks */ 169 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 170 } 171 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 172 col = in[j] - cstart_orig; /* local col index */ 173 brow = row/bs; bcol = col/bs; 174 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 175 if (roworiented) value = v[i*n+j]; 176 else value = v[i+j*m]; 177 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 178 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 179 } else if (in[j] < 0) continue; 180 #if defined(PETSC_USE_DEBUG) 181 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 182 #endif 183 else { /* off-diag entry (B) */ 184 if (mat->was_assembled) { 185 if (!baij->colmap) { 186 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 187 } 188 #if defined(PETSC_USE_CTABLE) 189 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 190 col = col - 1; 191 #else 192 col = baij->colmap[in[j]/bs] - 1; 193 #endif 194 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 195 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 196 col = in[j]; 197 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 198 B = baij->B; 199 b = (Mat_SeqBAIJ*)(B)->data; 200 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 201 ba = b->a; 202 } else col += in[j]%bs; 203 } else col = in[j]; 204 if (roworiented) value = v[i*n+j]; 205 else value = v[i+j*m]; 206 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 207 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 208 } 209 } 210 } else { /* off processor entry */ 211 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 212 if (!baij->donotstash) { 213 mat->assembled = PETSC_FALSE; 214 n_loc = 0; 215 for (j=0; j<n; j++) { 216 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 217 in_loc[n_loc] = in[j]; 218 if (roworiented) { 219 v_loc[n_loc] = v[i*n+j]; 220 } else { 221 v_loc[n_loc] = v[j*m+i]; 222 } 223 n_loc++; 224 } 225 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 226 } 227 } 228 } 229 PetscFunctionReturn(0); 230 } 231 232 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 233 { 234 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 235 PetscErrorCode ierr; 236 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 237 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 238 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 239 PetscBool roworiented=a->roworiented; 240 const PetscScalar *value = v; 241 MatScalar *ap,*aa = a->a,*bap; 242 243 PetscFunctionBegin; 244 if (col < row) { 245 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 246 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 247 } 248 rp = aj + ai[row]; 249 ap = aa + bs2*ai[row]; 250 rmax = imax[row]; 251 nrow = ailen[row]; 252 value = v; 253 low = 0; 254 high = nrow; 255 256 while (high-low > 7) { 257 t = (low+high)/2; 258 if (rp[t] > col) high = t; 259 else low = t; 260 } 261 for (i=low; i<high; i++) { 262 if (rp[i] > col) break; 263 if (rp[i] == col) { 264 bap = ap + bs2*i; 265 if (roworiented) { 266 if (is == ADD_VALUES) { 267 for (ii=0; ii<bs; ii++) { 268 for (jj=ii; jj<bs2; jj+=bs) { 269 bap[jj] += *value++; 270 } 271 } 272 } else { 273 for (ii=0; ii<bs; ii++) { 274 for (jj=ii; jj<bs2; jj+=bs) { 275 bap[jj] = *value++; 276 } 277 } 278 } 279 } else { 280 if (is == ADD_VALUES) { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=0; jj<bs; jj++) { 283 *bap++ += *value++; 284 } 285 } 286 } else { 287 for (ii=0; ii<bs; ii++) { 288 for (jj=0; jj<bs; jj++) { 289 *bap++ = *value++; 290 } 291 } 292 } 293 } 294 goto noinsert2; 295 } 296 } 297 if (nonew == 1) goto noinsert2; 298 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 299 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 300 N = nrow++ - 1; high++; 301 /* shift up all the later entries in this row */ 302 for (ii=N; ii>=i; ii--) { 303 rp[ii+1] = rp[ii]; 304 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 305 } 306 if (N >= i) { 307 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 308 } 309 rp[i] = col; 310 bap = ap + bs2*i; 311 if (roworiented) { 312 for (ii=0; ii<bs; ii++) { 313 for (jj=ii; jj<bs2; jj+=bs) { 314 bap[jj] = *value++; 315 } 316 } 317 } else { 318 for (ii=0; ii<bs; ii++) { 319 for (jj=0; jj<bs; jj++) { 320 *bap++ = *value++; 321 } 322 } 323 } 324 noinsert2:; 325 ailen[row] = nrow; 326 PetscFunctionReturn(0); 327 } 328 329 /* 330 This routine is exactly duplicated in mpibaij.c 331 */ 332 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 333 { 334 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 335 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 336 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 337 PetscErrorCode ierr; 338 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 339 PetscBool roworiented=a->roworiented; 340 const PetscScalar *value = v; 341 MatScalar *ap,*aa = a->a,*bap; 342 343 PetscFunctionBegin; 344 rp = aj + ai[row]; 345 ap = aa + bs2*ai[row]; 346 rmax = imax[row]; 347 nrow = ailen[row]; 348 low = 0; 349 high = nrow; 350 value = v; 351 while (high-low > 7) { 352 t = (low+high)/2; 353 if (rp[t] > col) high = t; 354 else low = t; 355 } 356 for (i=low; i<high; i++) { 357 if (rp[i] > col) break; 358 if (rp[i] == col) { 359 bap = ap + bs2*i; 360 if (roworiented) { 361 if (is == ADD_VALUES) { 362 for (ii=0; ii<bs; ii++) { 363 for (jj=ii; jj<bs2; jj+=bs) { 364 bap[jj] += *value++; 365 } 366 } 367 } else { 368 for (ii=0; ii<bs; ii++) { 369 for (jj=ii; jj<bs2; jj+=bs) { 370 bap[jj] = *value++; 371 } 372 } 373 } 374 } else { 375 if (is == ADD_VALUES) { 376 for (ii=0; ii<bs; ii++,value+=bs) { 377 for (jj=0; jj<bs; jj++) { 378 bap[jj] += value[jj]; 379 } 380 bap += bs; 381 } 382 } else { 383 for (ii=0; ii<bs; ii++,value+=bs) { 384 for (jj=0; jj<bs; jj++) { 385 bap[jj] = value[jj]; 386 } 387 bap += bs; 388 } 389 } 390 } 391 goto noinsert2; 392 } 393 } 394 if (nonew == 1) goto noinsert2; 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 396 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 397 N = nrow++ - 1; high++; 398 /* shift up all the later entries in this row */ 399 for (ii=N; ii>=i; ii--) { 400 rp[ii+1] = rp[ii]; 401 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 402 } 403 if (N >= i) { 404 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 405 } 406 rp[i] = col; 407 bap = ap + bs2*i; 408 if (roworiented) { 409 for (ii=0; ii<bs; ii++) { 410 for (jj=ii; jj<bs2; jj+=bs) { 411 bap[jj] = *value++; 412 } 413 } 414 } else { 415 for (ii=0; ii<bs; ii++) { 416 for (jj=0; jj<bs; jj++) { 417 *bap++ = *value++; 418 } 419 } 420 } 421 noinsert2:; 422 ailen[row] = nrow; 423 PetscFunctionReturn(0); 424 } 425 426 /* 427 This routine could be optimized by removing the need for the block copy below and passing stride information 428 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 429 */ 430 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 431 { 432 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 433 const MatScalar *value; 434 MatScalar *barray =baij->barray; 435 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 436 PetscErrorCode ierr; 437 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 438 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 439 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 440 441 PetscFunctionBegin; 442 if (!barray) { 443 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 444 baij->barray = barray; 445 } 446 447 if (roworiented) { 448 stepval = (n-1)*bs; 449 } else { 450 stepval = (m-1)*bs; 451 } 452 for (i=0; i<m; i++) { 453 if (im[i] < 0) continue; 454 #if defined(PETSC_USE_DEBUG) 455 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 456 #endif 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 for (j=0; j<n; j++) { 460 if (im[i] > in[j]) { 461 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 462 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 463 } 464 /* If NumCol = 1 then a copy is not required */ 465 if ((roworiented) && (n == 1)) { 466 barray = (MatScalar*) v + i*bs2; 467 } else if ((!roworiented) && (m == 1)) { 468 barray = (MatScalar*) v + j*bs2; 469 } else { /* Here a copy is required */ 470 if (roworiented) { 471 value = v + i*(stepval+bs)*bs + j*bs; 472 } else { 473 value = v + j*(stepval+bs)*bs + i*bs; 474 } 475 for (ii=0; ii<bs; ii++,value+=stepval) { 476 for (jj=0; jj<bs; jj++) { 477 *barray++ = *value++; 478 } 479 } 480 barray -=bs2; 481 } 482 483 if (in[j] >= cstart && in[j] < cend) { 484 col = in[j] - cstart; 485 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 486 } else if (in[j] < 0) continue; 487 #if defined(PETSC_USE_DEBUG) 488 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 489 #endif 490 else { 491 if (mat->was_assembled) { 492 if (!baij->colmap) { 493 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 494 } 495 496 #if defined(PETSC_USE_DEBUG) 497 #if defined(PETSC_USE_CTABLE) 498 { PetscInt data; 499 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 500 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 501 } 502 #else 503 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 504 #endif 505 #endif 506 #if defined(PETSC_USE_CTABLE) 507 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 508 col = (col - 1)/bs; 509 #else 510 col = (baij->colmap[in[j]] - 1)/bs; 511 #endif 512 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 513 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 514 col = in[j]; 515 } 516 } else col = in[j]; 517 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 518 } 519 } 520 } else { 521 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 522 if (!baij->donotstash) { 523 if (roworiented) { 524 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 525 } else { 526 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 527 } 528 } 529 } 530 } 531 PetscFunctionReturn(0); 532 } 533 534 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 535 { 536 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 537 PetscErrorCode ierr; 538 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 539 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 540 541 PetscFunctionBegin; 542 for (i=0; i<m; i++) { 543 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 544 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 545 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 546 row = idxm[i] - bsrstart; 547 for (j=0; j<n; j++) { 548 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 549 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 550 if (idxn[j] >= bscstart && idxn[j] < bscend) { 551 col = idxn[j] - bscstart; 552 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 553 } else { 554 if (!baij->colmap) { 555 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 556 } 557 #if defined(PETSC_USE_CTABLE) 558 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 559 data--; 560 #else 561 data = baij->colmap[idxn[j]/bs]-1; 562 #endif 563 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 564 else { 565 col = data + idxn[j]%bs; 566 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } 568 } 569 } 570 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 571 } 572 PetscFunctionReturn(0); 573 } 574 575 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 576 { 577 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 578 PetscErrorCode ierr; 579 PetscReal sum[2],*lnorm2; 580 581 PetscFunctionBegin; 582 if (baij->size == 1) { 583 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 584 } else { 585 if (type == NORM_FROBENIUS) { 586 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 587 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 588 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 589 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 590 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 591 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 592 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 593 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 594 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 595 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 596 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 597 PetscReal *rsum,*rsum2,vabs; 598 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 599 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 600 MatScalar *v; 601 602 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 603 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 604 /* Amat */ 605 v = amat->a; jj = amat->j; 606 for (brow=0; brow<mbs; brow++) { 607 grow = bs*(rstart + brow); 608 nz = amat->i[brow+1] - amat->i[brow]; 609 for (bcol=0; bcol<nz; bcol++) { 610 gcol = bs*(rstart + *jj); jj++; 611 for (col=0; col<bs; col++) { 612 for (row=0; row<bs; row++) { 613 vabs = PetscAbsScalar(*v); v++; 614 rsum[gcol+col] += vabs; 615 /* non-diagonal block */ 616 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 617 } 618 } 619 } 620 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 621 } 622 /* Bmat */ 623 v = bmat->a; jj = bmat->j; 624 for (brow=0; brow<mbs; brow++) { 625 grow = bs*(rstart + brow); 626 nz = bmat->i[brow+1] - bmat->i[brow]; 627 for (bcol=0; bcol<nz; bcol++) { 628 gcol = bs*garray[*jj]; jj++; 629 for (col=0; col<bs; col++) { 630 for (row=0; row<bs; row++) { 631 vabs = PetscAbsScalar(*v); v++; 632 rsum[gcol+col] += vabs; 633 rsum[grow+row] += vabs; 634 } 635 } 636 } 637 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 638 } 639 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 *norm = 0.0; 641 for (col=0; col<mat->cmap->N; col++) { 642 if (rsum2[col] > *norm) *norm = rsum2[col]; 643 } 644 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 645 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 646 } 647 PetscFunctionReturn(0); 648 } 649 650 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 651 { 652 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 653 PetscErrorCode ierr; 654 PetscInt nstash,reallocs; 655 656 PetscFunctionBegin; 657 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 658 659 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 660 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 664 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 668 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 671 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 672 PetscErrorCode ierr; 673 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 674 PetscInt *row,*col; 675 PetscBool other_disassembled; 676 PetscMPIInt n; 677 PetscBool r1,r2,r3; 678 MatScalar *val; 679 680 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 681 PetscFunctionBegin; 682 if (!baij->donotstash && !mat->nooffprocentries) { 683 while (1) { 684 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 685 if (!flg) break; 686 687 for (i=0; i<n;) { 688 /* Now identify the consecutive vals belonging to the same row */ 689 for (j=i,rstart=row[j]; j<n; j++) { 690 if (row[j] != rstart) break; 691 } 692 if (j < n) ncols = j-i; 693 else ncols = n-i; 694 /* Now assemble all these values with a single function call */ 695 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 696 i = j; 697 } 698 } 699 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 700 /* Now process the block-stash. Since the values are stashed column-oriented, 701 set the roworiented flag to column oriented, and after MatSetValues() 702 restore the original flags */ 703 r1 = baij->roworiented; 704 r2 = a->roworiented; 705 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 706 707 baij->roworiented = PETSC_FALSE; 708 a->roworiented = PETSC_FALSE; 709 710 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 711 while (1) { 712 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 713 if (!flg) break; 714 715 for (i=0; i<n;) { 716 /* Now identify the consecutive vals belonging to the same row */ 717 for (j=i,rstart=row[j]; j<n; j++) { 718 if (row[j] != rstart) break; 719 } 720 if (j < n) ncols = j-i; 721 else ncols = n-i; 722 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 723 i = j; 724 } 725 } 726 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 727 728 baij->roworiented = r1; 729 a->roworiented = r2; 730 731 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 732 } 733 734 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 735 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 736 737 /* determine if any processor has disassembled, if so we must 738 also disassemble ourselfs, in order that we may reassemble. */ 739 /* 740 if nonzero structure of submatrix B cannot change then we know that 741 no processor disassembled thus we can skip this stuff 742 */ 743 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 744 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 745 if (mat->was_assembled && !other_disassembled) { 746 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 747 } 748 } 749 750 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 751 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 752 } 753 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 754 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 755 756 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 757 758 baij->rowvalues = 0; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 762 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 769 #include <petscdraw.h> 770 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 771 { 772 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 773 PetscErrorCode ierr; 774 PetscInt bs = mat->rmap->bs; 775 PetscMPIInt rank = baij->rank; 776 PetscBool iascii,isdraw; 777 PetscViewer sviewer; 778 PetscViewerFormat format; 779 780 PetscFunctionBegin; 781 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 782 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 783 if (iascii) { 784 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 785 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 786 MatInfo info; 787 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 788 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 789 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 790 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 791 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 792 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 793 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 794 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 795 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 796 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 797 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 798 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 799 PetscFunctionReturn(0); 800 } else if (format == PETSC_VIEWER_ASCII_INFO) { 801 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 802 PetscFunctionReturn(0); 803 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 804 PetscFunctionReturn(0); 805 } 806 } 807 808 if (isdraw) { 809 PetscDraw draw; 810 PetscBool isnull; 811 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 812 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 813 if (isnull) PetscFunctionReturn(0); 814 } 815 816 { 817 /* assemble the entire matrix onto first processor. */ 818 Mat A; 819 Mat_SeqSBAIJ *Aloc; 820 Mat_SeqBAIJ *Bloc; 821 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 822 MatScalar *a; 823 const char *matname; 824 825 /* Should this be the same type as mat? */ 826 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 827 if (!rank) { 828 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 829 } else { 830 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 831 } 832 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 833 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 834 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 835 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 836 837 /* copy over the A part */ 838 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 839 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 840 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 841 842 for (i=0; i<mbs; i++) { 843 rvals[0] = bs*(baij->rstartbs + i); 844 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 845 for (j=ai[i]; j<ai[i+1]; j++) { 846 col = (baij->cstartbs+aj[j])*bs; 847 for (k=0; k<bs; k++) { 848 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 849 col++; 850 a += bs; 851 } 852 } 853 } 854 /* copy over the B part */ 855 Bloc = (Mat_SeqBAIJ*)baij->B->data; 856 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 857 for (i=0; i<mbs; i++) { 858 859 rvals[0] = bs*(baij->rstartbs + i); 860 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 861 for (j=ai[i]; j<ai[i+1]; j++) { 862 col = baij->garray[aj[j]]*bs; 863 for (k=0; k<bs; k++) { 864 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 865 col++; 866 a += bs; 867 } 868 } 869 } 870 ierr = PetscFree(rvals);CHKERRQ(ierr); 871 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 872 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 873 /* 874 Everyone has to call to draw the matrix since the graphics waits are 875 synchronized across all processors that share the PetscDraw object 876 */ 877 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 878 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 879 if (!rank) { 880 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 881 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 882 } 883 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 884 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 885 ierr = MatDestroy(&A);CHKERRQ(ierr); 886 } 887 PetscFunctionReturn(0); 888 } 889 890 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 891 { 892 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 893 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 894 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 895 PetscErrorCode ierr; 896 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 897 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 898 int fd; 899 PetscScalar *column_values; 900 FILE *file; 901 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 902 PetscInt message_count,flowcontrolcount; 903 904 PetscFunctionBegin; 905 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 906 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 907 nz = bs2*(A->nz + B->nz); 908 rlen = mat->rmap->n; 909 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 910 if (!rank) { 911 header[0] = MAT_FILE_CLASSID; 912 header[1] = mat->rmap->N; 913 header[2] = mat->cmap->N; 914 915 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 916 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 917 /* get largest number of rows any processor has */ 918 range = mat->rmap->range; 919 for (i=1; i<size; i++) { 920 rlen = PetscMax(rlen,range[i+1] - range[i]); 921 } 922 } else { 923 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 924 } 925 926 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 927 /* compute lengths of each row */ 928 for (i=0; i<a->mbs; i++) { 929 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 930 } 931 /* store the row lengths to the file */ 932 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 933 if (!rank) { 934 MPI_Status status; 935 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 936 rlen = (range[1] - range[0])/bs; 937 for (i=0; i<rlen; i++) { 938 for (j=0; j<bs; j++) { 939 row_lens[i*bs+j] = bs*crow_lens[i]; 940 } 941 } 942 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 943 for (i=1; i<size; i++) { 944 rlen = (range[i+1] - range[i])/bs; 945 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 946 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 947 for (k=0; k<rlen; k++) { 948 for (j=0; j<bs; j++) { 949 row_lens[k*bs+j] = bs*crow_lens[k]; 950 } 951 } 952 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 953 } 954 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 955 ierr = PetscFree(row_lens);CHKERRQ(ierr); 956 } else { 957 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 958 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 959 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 960 } 961 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 962 963 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 964 information needed to make it for each row from a block row. This does require more communication but still not more than 965 the communication needed for the nonzero values */ 966 nzmax = nz; /* space a largest processor needs */ 967 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 968 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 969 cnt = 0; 970 for (i=0; i<a->mbs; i++) { 971 pcnt = cnt; 972 for (j=B->i[i]; j<B->i[i+1]; j++) { 973 if ((col = garray[B->j[j]]) > cstart) break; 974 for (l=0; l<bs; l++) { 975 column_indices[cnt++] = bs*col+l; 976 } 977 } 978 for (k=A->i[i]; k<A->i[i+1]; k++) { 979 for (l=0; l<bs; l++) { 980 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 981 } 982 } 983 for (; j<B->i[i+1]; j++) { 984 for (l=0; l<bs; l++) { 985 column_indices[cnt++] = bs*garray[B->j[j]]+l; 986 } 987 } 988 len = cnt - pcnt; 989 for (k=1; k<bs; k++) { 990 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 991 cnt += len; 992 } 993 } 994 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 995 996 /* store the columns to the file */ 997 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 998 if (!rank) { 999 MPI_Status status; 1000 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1001 for (i=1; i<size; i++) { 1002 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1003 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1004 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1005 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1006 } 1007 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1008 } else { 1009 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1010 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1011 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1012 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1013 } 1014 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1015 1016 /* load up the numerical values */ 1017 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1018 cnt = 0; 1019 for (i=0; i<a->mbs; i++) { 1020 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1021 for (j=B->i[i]; j<B->i[i+1]; j++) { 1022 if (garray[B->j[j]] > cstart) break; 1023 for (l=0; l<bs; l++) { 1024 for (ll=0; ll<bs; ll++) { 1025 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1026 } 1027 } 1028 cnt += bs; 1029 } 1030 for (k=A->i[i]; k<A->i[i+1]; k++) { 1031 for (l=0; l<bs; l++) { 1032 for (ll=0; ll<bs; ll++) { 1033 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1034 } 1035 } 1036 cnt += bs; 1037 } 1038 for (; j<B->i[i+1]; j++) { 1039 for (l=0; l<bs; l++) { 1040 for (ll=0; ll<bs; ll++) { 1041 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1042 } 1043 } 1044 cnt += bs; 1045 } 1046 cnt += (bs-1)*rlen; 1047 } 1048 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1049 1050 /* store the column values to the file */ 1051 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1052 if (!rank) { 1053 MPI_Status status; 1054 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1055 for (i=1; i<size; i++) { 1056 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1057 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1058 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1059 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1060 } 1061 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1062 } else { 1063 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1064 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1065 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1066 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1067 } 1068 ierr = PetscFree(column_values);CHKERRQ(ierr); 1069 1070 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1071 if (file) { 1072 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1073 } 1074 PetscFunctionReturn(0); 1075 } 1076 1077 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1078 { 1079 PetscErrorCode ierr; 1080 PetscBool iascii,isdraw,issocket,isbinary; 1081 1082 PetscFunctionBegin; 1083 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1084 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1085 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1086 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1087 if (iascii || isdraw || issocket) { 1088 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1089 } else if (isbinary) { 1090 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1091 } 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1096 { 1097 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 #if defined(PETSC_USE_LOG) 1102 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1103 #endif 1104 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1105 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1106 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1107 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1108 #if defined(PETSC_USE_CTABLE) 1109 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1110 #else 1111 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1112 #endif 1113 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1114 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1115 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1116 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1117 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1118 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1119 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1120 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1121 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1122 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1123 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1124 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1125 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1126 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1127 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1128 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1129 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1130 #endif 1131 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1132 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1133 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1134 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1135 1136 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 1141 #if defined(PETSC_HAVE_ELEMENTAL) 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1143 #endif 1144 PetscFunctionReturn(0); 1145 } 1146 1147 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1152 PetscScalar *from; 1153 const PetscScalar *x; 1154 1155 PetscFunctionBegin; 1156 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1157 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1158 1159 /* diagonal part */ 1160 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1161 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1162 1163 /* subdiagonal part */ 1164 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1165 1166 /* copy x into the vec slvec0 */ 1167 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1168 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1169 1170 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1171 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1172 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1173 1174 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1175 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1176 /* supperdiagonal part */ 1177 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1182 { 1183 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1184 PetscErrorCode ierr; 1185 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1186 PetscScalar *from; 1187 const PetscScalar *x; 1188 1189 PetscFunctionBegin; 1190 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1191 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1192 1193 /* diagonal part */ 1194 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1195 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1196 1197 /* subdiagonal part */ 1198 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1199 1200 /* copy x into the vec slvec0 */ 1201 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1202 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1203 1204 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1205 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1206 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1207 1208 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1209 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1210 /* supperdiagonal part */ 1211 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1216 { 1217 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1218 PetscErrorCode ierr; 1219 PetscInt nt; 1220 1221 PetscFunctionBegin; 1222 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1223 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1224 1225 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1226 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1227 1228 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 /* do diagonal part */ 1230 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1231 /* do supperdiagonal part */ 1232 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1234 /* do subdiagonal part */ 1235 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1236 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1242 { 1243 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1244 PetscErrorCode ierr; 1245 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1246 PetscScalar *from,zero=0.0; 1247 const PetscScalar *x; 1248 1249 PetscFunctionBegin; 1250 /* 1251 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1252 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1253 */ 1254 /* diagonal part */ 1255 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1256 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1257 1258 /* subdiagonal part */ 1259 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1260 1261 /* copy x into the vec slvec0 */ 1262 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1263 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1264 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1265 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1266 1267 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 1271 /* supperdiagonal part */ 1272 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1277 { 1278 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1279 PetscErrorCode ierr; 1280 1281 PetscFunctionBegin; 1282 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 /* do diagonal part */ 1284 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1285 /* do supperdiagonal part */ 1286 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1287 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1288 1289 /* do subdiagonal part */ 1290 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1291 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1292 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 /* 1297 This only works correctly for square matrices where the subblock A->A is the 1298 diagonal block 1299 */ 1300 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1301 { 1302 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1303 PetscErrorCode ierr; 1304 1305 PetscFunctionBegin; 1306 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1307 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1312 { 1313 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1314 PetscErrorCode ierr; 1315 1316 PetscFunctionBegin; 1317 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1318 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1323 { 1324 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1325 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1326 PetscErrorCode ierr; 1327 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1328 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1329 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1330 1331 PetscFunctionBegin; 1332 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1333 mat->getrowactive = PETSC_TRUE; 1334 1335 if (!mat->rowvalues && (idx || v)) { 1336 /* 1337 allocate enough space to hold information from the longest row. 1338 */ 1339 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1340 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1341 PetscInt max = 1,mbs = mat->mbs,tmp; 1342 for (i=0; i<mbs; i++) { 1343 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1344 if (max < tmp) max = tmp; 1345 } 1346 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1347 } 1348 1349 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1350 lrow = row - brstart; /* local row index */ 1351 1352 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1353 if (!v) {pvA = 0; pvB = 0;} 1354 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1355 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1356 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1357 nztot = nzA + nzB; 1358 1359 cmap = mat->garray; 1360 if (v || idx) { 1361 if (nztot) { 1362 /* Sort by increasing column numbers, assuming A and B already sorted */ 1363 PetscInt imark = -1; 1364 if (v) { 1365 *v = v_p = mat->rowvalues; 1366 for (i=0; i<nzB; i++) { 1367 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1368 else break; 1369 } 1370 imark = i; 1371 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1372 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1373 } 1374 if (idx) { 1375 *idx = idx_p = mat->rowindices; 1376 if (imark > -1) { 1377 for (i=0; i<imark; i++) { 1378 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1379 } 1380 } else { 1381 for (i=0; i<nzB; i++) { 1382 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1383 else break; 1384 } 1385 imark = i; 1386 } 1387 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1388 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1389 } 1390 } else { 1391 if (idx) *idx = 0; 1392 if (v) *v = 0; 1393 } 1394 } 1395 *nz = nztot; 1396 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1397 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1402 { 1403 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1404 1405 PetscFunctionBegin; 1406 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1407 baij->getrowactive = PETSC_FALSE; 1408 PetscFunctionReturn(0); 1409 } 1410 1411 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1412 { 1413 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1414 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1415 1416 PetscFunctionBegin; 1417 aA->getrow_utriangular = PETSC_TRUE; 1418 PetscFunctionReturn(0); 1419 } 1420 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1421 { 1422 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1423 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1424 1425 PetscFunctionBegin; 1426 aA->getrow_utriangular = PETSC_FALSE; 1427 PetscFunctionReturn(0); 1428 } 1429 1430 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1431 { 1432 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1437 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1442 { 1443 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1444 PetscErrorCode ierr; 1445 1446 PetscFunctionBegin; 1447 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1448 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 1452 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1453 Input: isrow - distributed(parallel), 1454 iscol_local - locally owned (seq) 1455 */ 1456 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1457 { 1458 PetscErrorCode ierr; 1459 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1460 const PetscInt *ptr1,*ptr2; 1461 1462 PetscFunctionBegin; 1463 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1464 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1465 if (sz1 > sz2) { 1466 *flg = PETSC_FALSE; 1467 PetscFunctionReturn(0); 1468 } 1469 1470 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1471 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1472 1473 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1474 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1475 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1476 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1477 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1478 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1479 1480 nmatch=0; 1481 k = 0; 1482 for (i=0; i<sz1; i++){ 1483 for (j=k; j<sz2; j++){ 1484 if (a1[i] == a2[j]) { 1485 k = j; nmatch++; 1486 break; 1487 } 1488 } 1489 } 1490 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1491 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1492 ierr = PetscFree(a1);CHKERRQ(ierr); 1493 ierr = PetscFree(a2);CHKERRQ(ierr); 1494 if (nmatch < sz1) { 1495 *flg = PETSC_FALSE; 1496 } else { 1497 *flg = PETSC_TRUE; 1498 } 1499 PetscFunctionReturn(0); 1500 } 1501 1502 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1503 { 1504 PetscErrorCode ierr; 1505 IS iscol_local; 1506 PetscInt csize; 1507 PetscBool isequal; 1508 1509 PetscFunctionBegin; 1510 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1511 if (call == MAT_REUSE_MATRIX) { 1512 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1513 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1514 } else { 1515 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1516 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1517 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1518 } 1519 1520 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1521 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1522 if (call == MAT_INITIAL_MATRIX) { 1523 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1524 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1525 } 1526 PetscFunctionReturn(0); 1527 } 1528 1529 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1530 { 1531 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1532 PetscErrorCode ierr; 1533 1534 PetscFunctionBegin; 1535 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1536 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1541 { 1542 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1543 Mat A = a->A,B = a->B; 1544 PetscErrorCode ierr; 1545 PetscReal isend[5],irecv[5]; 1546 1547 PetscFunctionBegin; 1548 info->block_size = (PetscReal)matin->rmap->bs; 1549 1550 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1551 1552 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1553 isend[3] = info->memory; isend[4] = info->mallocs; 1554 1555 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1556 1557 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1558 isend[3] += info->memory; isend[4] += info->mallocs; 1559 if (flag == MAT_LOCAL) { 1560 info->nz_used = isend[0]; 1561 info->nz_allocated = isend[1]; 1562 info->nz_unneeded = isend[2]; 1563 info->memory = isend[3]; 1564 info->mallocs = isend[4]; 1565 } else if (flag == MAT_GLOBAL_MAX) { 1566 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1567 1568 info->nz_used = irecv[0]; 1569 info->nz_allocated = irecv[1]; 1570 info->nz_unneeded = irecv[2]; 1571 info->memory = irecv[3]; 1572 info->mallocs = irecv[4]; 1573 } else if (flag == MAT_GLOBAL_SUM) { 1574 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1575 1576 info->nz_used = irecv[0]; 1577 info->nz_allocated = irecv[1]; 1578 info->nz_unneeded = irecv[2]; 1579 info->memory = irecv[3]; 1580 info->mallocs = irecv[4]; 1581 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1582 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1583 info->fill_ratio_needed = 0; 1584 info->factor_mallocs = 0; 1585 PetscFunctionReturn(0); 1586 } 1587 1588 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1589 { 1590 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1591 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1592 PetscErrorCode ierr; 1593 1594 PetscFunctionBegin; 1595 switch (op) { 1596 case MAT_NEW_NONZERO_LOCATIONS: 1597 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1598 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1599 case MAT_KEEP_NONZERO_PATTERN: 1600 case MAT_SUBMAT_SINGLEIS: 1601 case MAT_NEW_NONZERO_LOCATION_ERR: 1602 MatCheckPreallocated(A,1); 1603 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1604 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1605 break; 1606 case MAT_ROW_ORIENTED: 1607 MatCheckPreallocated(A,1); 1608 a->roworiented = flg; 1609 1610 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1611 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1612 break; 1613 case MAT_NEW_DIAGONALS: 1614 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1615 break; 1616 case MAT_IGNORE_OFF_PROC_ENTRIES: 1617 a->donotstash = flg; 1618 break; 1619 case MAT_USE_HASH_TABLE: 1620 a->ht_flag = flg; 1621 break; 1622 case MAT_HERMITIAN: 1623 MatCheckPreallocated(A,1); 1624 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1625 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1626 #if defined(PETSC_USE_COMPLEX) 1627 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1628 #endif 1629 break; 1630 case MAT_SPD: 1631 A->spd_set = PETSC_TRUE; 1632 A->spd = flg; 1633 if (flg) { 1634 A->symmetric = PETSC_TRUE; 1635 A->structurally_symmetric = PETSC_TRUE; 1636 A->symmetric_set = PETSC_TRUE; 1637 A->structurally_symmetric_set = PETSC_TRUE; 1638 } 1639 break; 1640 case MAT_SYMMETRIC: 1641 MatCheckPreallocated(A,1); 1642 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1643 break; 1644 case MAT_STRUCTURALLY_SYMMETRIC: 1645 MatCheckPreallocated(A,1); 1646 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1647 break; 1648 case MAT_SYMMETRY_ETERNAL: 1649 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1650 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1651 break; 1652 case MAT_IGNORE_LOWER_TRIANGULAR: 1653 aA->ignore_ltriangular = flg; 1654 break; 1655 case MAT_ERROR_LOWER_TRIANGULAR: 1656 aA->ignore_ltriangular = flg; 1657 break; 1658 case MAT_GETROW_UPPERTRIANGULAR: 1659 aA->getrow_utriangular = flg; 1660 break; 1661 default: 1662 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1663 } 1664 PetscFunctionReturn(0); 1665 } 1666 1667 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1668 { 1669 PetscErrorCode ierr; 1670 1671 PetscFunctionBegin; 1672 if (reuse == MAT_INITIAL_MATRIX) { 1673 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1674 } else if (reuse == MAT_REUSE_MATRIX) { 1675 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1676 } 1677 PetscFunctionReturn(0); 1678 } 1679 1680 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1681 { 1682 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1683 Mat a = baij->A, b=baij->B; 1684 PetscErrorCode ierr; 1685 PetscInt nv,m,n; 1686 PetscBool flg; 1687 1688 PetscFunctionBegin; 1689 if (ll != rr) { 1690 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1691 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1692 } 1693 if (!ll) PetscFunctionReturn(0); 1694 1695 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1696 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1697 1698 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1699 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1700 1701 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1702 1703 /* left diagonalscale the off-diagonal part */ 1704 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1705 1706 /* scale the diagonal part */ 1707 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1708 1709 /* right diagonalscale the off-diagonal part */ 1710 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1711 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1716 { 1717 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1718 PetscErrorCode ierr; 1719 1720 PetscFunctionBegin; 1721 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1722 PetscFunctionReturn(0); 1723 } 1724 1725 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1726 1727 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1728 { 1729 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1730 Mat a,b,c,d; 1731 PetscBool flg; 1732 PetscErrorCode ierr; 1733 1734 PetscFunctionBegin; 1735 a = matA->A; b = matA->B; 1736 c = matB->A; d = matB->B; 1737 1738 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1739 if (flg) { 1740 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1741 } 1742 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 1746 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1747 { 1748 PetscErrorCode ierr; 1749 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1750 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1751 1752 PetscFunctionBegin; 1753 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1754 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1755 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1756 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1757 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1758 } else { 1759 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1760 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1761 } 1762 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1763 PetscFunctionReturn(0); 1764 } 1765 1766 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1767 { 1768 PetscErrorCode ierr; 1769 1770 PetscFunctionBegin; 1771 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1772 PetscFunctionReturn(0); 1773 } 1774 1775 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1776 { 1777 PetscErrorCode ierr; 1778 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1779 PetscBLASInt bnz,one=1; 1780 Mat_SeqSBAIJ *xa,*ya; 1781 Mat_SeqBAIJ *xb,*yb; 1782 1783 PetscFunctionBegin; 1784 if (str == SAME_NONZERO_PATTERN) { 1785 PetscScalar alpha = a; 1786 xa = (Mat_SeqSBAIJ*)xx->A->data; 1787 ya = (Mat_SeqSBAIJ*)yy->A->data; 1788 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1789 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1790 xb = (Mat_SeqBAIJ*)xx->B->data; 1791 yb = (Mat_SeqBAIJ*)yy->B->data; 1792 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1793 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1794 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1795 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1796 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1797 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1798 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1799 } else { 1800 Mat B; 1801 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1802 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1803 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1804 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1805 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1806 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1807 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1808 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1809 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1810 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1811 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1812 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1813 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1814 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1815 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1816 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1817 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1818 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1819 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1820 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1821 } 1822 PetscFunctionReturn(0); 1823 } 1824 1825 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1826 { 1827 PetscErrorCode ierr; 1828 PetscInt i; 1829 PetscBool flg; 1830 1831 PetscFunctionBegin; 1832 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1833 for (i=0; i<n; i++) { 1834 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1835 if (!flg) { 1836 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1837 } 1838 } 1839 PetscFunctionReturn(0); 1840 } 1841 1842 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1843 { 1844 PetscErrorCode ierr; 1845 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1846 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1847 1848 PetscFunctionBegin; 1849 if (!Y->preallocated) { 1850 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1851 } else if (!aij->nz) { 1852 PetscInt nonew = aij->nonew; 1853 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1854 aij->nonew = nonew; 1855 } 1856 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1857 PetscFunctionReturn(0); 1858 } 1859 1860 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1861 { 1862 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1863 PetscErrorCode ierr; 1864 1865 PetscFunctionBegin; 1866 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1867 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1868 if (d) { 1869 PetscInt rstart; 1870 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1871 *d += rstart/A->rmap->bs; 1872 1873 } 1874 PetscFunctionReturn(0); 1875 } 1876 1877 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1878 { 1879 PetscFunctionBegin; 1880 *a = ((Mat_MPISBAIJ*)A->data)->A; 1881 PetscFunctionReturn(0); 1882 } 1883 1884 /* -------------------------------------------------------------------*/ 1885 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1886 MatGetRow_MPISBAIJ, 1887 MatRestoreRow_MPISBAIJ, 1888 MatMult_MPISBAIJ, 1889 /* 4*/ MatMultAdd_MPISBAIJ, 1890 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1891 MatMultAdd_MPISBAIJ, 1892 0, 1893 0, 1894 0, 1895 /* 10*/ 0, 1896 0, 1897 0, 1898 MatSOR_MPISBAIJ, 1899 MatTranspose_MPISBAIJ, 1900 /* 15*/ MatGetInfo_MPISBAIJ, 1901 MatEqual_MPISBAIJ, 1902 MatGetDiagonal_MPISBAIJ, 1903 MatDiagonalScale_MPISBAIJ, 1904 MatNorm_MPISBAIJ, 1905 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1906 MatAssemblyEnd_MPISBAIJ, 1907 MatSetOption_MPISBAIJ, 1908 MatZeroEntries_MPISBAIJ, 1909 /* 24*/ 0, 1910 0, 1911 0, 1912 0, 1913 0, 1914 /* 29*/ MatSetUp_MPISBAIJ, 1915 0, 1916 0, 1917 MatGetDiagonalBlock_MPISBAIJ, 1918 0, 1919 /* 34*/ MatDuplicate_MPISBAIJ, 1920 0, 1921 0, 1922 0, 1923 0, 1924 /* 39*/ MatAXPY_MPISBAIJ, 1925 MatCreateSubMatrices_MPISBAIJ, 1926 MatIncreaseOverlap_MPISBAIJ, 1927 MatGetValues_MPISBAIJ, 1928 MatCopy_MPISBAIJ, 1929 /* 44*/ 0, 1930 MatScale_MPISBAIJ, 1931 MatShift_MPISBAIJ, 1932 0, 1933 0, 1934 /* 49*/ 0, 1935 0, 1936 0, 1937 0, 1938 0, 1939 /* 54*/ 0, 1940 0, 1941 MatSetUnfactored_MPISBAIJ, 1942 0, 1943 MatSetValuesBlocked_MPISBAIJ, 1944 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1945 0, 1946 0, 1947 0, 1948 0, 1949 /* 64*/ 0, 1950 0, 1951 0, 1952 0, 1953 0, 1954 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1955 0, 1956 0, 1957 0, 1958 0, 1959 /* 74*/ 0, 1960 0, 1961 0, 1962 0, 1963 0, 1964 /* 79*/ 0, 1965 0, 1966 0, 1967 0, 1968 MatLoad_MPISBAIJ, 1969 /* 84*/ 0, 1970 0, 1971 0, 1972 0, 1973 0, 1974 /* 89*/ 0, 1975 0, 1976 0, 1977 0, 1978 0, 1979 /* 94*/ 0, 1980 0, 1981 0, 1982 0, 1983 0, 1984 /* 99*/ 0, 1985 0, 1986 0, 1987 0, 1988 0, 1989 /*104*/ 0, 1990 MatRealPart_MPISBAIJ, 1991 MatImaginaryPart_MPISBAIJ, 1992 MatGetRowUpperTriangular_MPISBAIJ, 1993 MatRestoreRowUpperTriangular_MPISBAIJ, 1994 /*109*/ 0, 1995 0, 1996 0, 1997 0, 1998 MatMissingDiagonal_MPISBAIJ, 1999 /*114*/ 0, 2000 0, 2001 0, 2002 0, 2003 0, 2004 /*119*/ 0, 2005 0, 2006 0, 2007 0, 2008 0, 2009 /*124*/ 0, 2010 0, 2011 0, 2012 0, 2013 0, 2014 /*129*/ 0, 2015 0, 2016 0, 2017 0, 2018 0, 2019 /*134*/ 0, 2020 0, 2021 0, 2022 0, 2023 0, 2024 /*139*/ MatSetBlockSizes_Default, 2025 0, 2026 0, 2027 0, 2028 0, 2029 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2030 }; 2031 2032 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2033 { 2034 Mat_MPISBAIJ *b; 2035 PetscErrorCode ierr; 2036 PetscInt i,mbs,Mbs; 2037 2038 PetscFunctionBegin; 2039 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2040 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2041 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2042 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2043 2044 b = (Mat_MPISBAIJ*)B->data; 2045 mbs = B->rmap->n/bs; 2046 Mbs = B->rmap->N/bs; 2047 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2048 2049 B->rmap->bs = bs; 2050 b->bs2 = bs*bs; 2051 b->mbs = mbs; 2052 b->Mbs = Mbs; 2053 b->nbs = B->cmap->n/bs; 2054 b->Nbs = B->cmap->N/bs; 2055 2056 for (i=0; i<=b->size; i++) { 2057 b->rangebs[i] = B->rmap->range[i]/bs; 2058 } 2059 b->rstartbs = B->rmap->rstart/bs; 2060 b->rendbs = B->rmap->rend/bs; 2061 2062 b->cstartbs = B->cmap->rstart/bs; 2063 b->cendbs = B->cmap->rend/bs; 2064 2065 #if defined(PETSC_USE_CTABLE) 2066 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2067 #else 2068 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2069 #endif 2070 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2071 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2072 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2073 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2074 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2075 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2076 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2077 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2078 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2079 2080 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2081 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2082 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2083 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2084 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2085 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2086 2087 if (!B->preallocated) { 2088 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2089 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2090 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2091 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2092 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2093 } 2094 2095 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2096 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2097 2098 B->preallocated = PETSC_TRUE; 2099 B->was_assembled = PETSC_FALSE; 2100 B->assembled = PETSC_FALSE; 2101 PetscFunctionReturn(0); 2102 } 2103 2104 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2105 { 2106 PetscInt m,rstart,cstart,cend; 2107 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2108 const PetscInt *JJ =0; 2109 PetscScalar *values=0; 2110 PetscErrorCode ierr; 2111 2112 PetscFunctionBegin; 2113 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2114 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2115 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2116 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2117 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2118 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2119 m = B->rmap->n/bs; 2120 rstart = B->rmap->rstart/bs; 2121 cstart = B->cmap->rstart/bs; 2122 cend = B->cmap->rend/bs; 2123 2124 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2125 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2126 for (i=0; i<m; i++) { 2127 nz = ii[i+1] - ii[i]; 2128 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2129 nz_max = PetscMax(nz_max,nz); 2130 JJ = jj + ii[i]; 2131 for (j=0; j<nz; j++) { 2132 if (*JJ >= cstart) break; 2133 JJ++; 2134 } 2135 d = 0; 2136 for (; j<nz; j++) { 2137 if (*JJ++ >= cend) break; 2138 d++; 2139 } 2140 d_nnz[i] = d; 2141 o_nnz[i] = nz - d; 2142 } 2143 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2144 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2145 2146 values = (PetscScalar*)V; 2147 if (!values) { 2148 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2149 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2150 } 2151 for (i=0; i<m; i++) { 2152 PetscInt row = i + rstart; 2153 PetscInt ncols = ii[i+1] - ii[i]; 2154 const PetscInt *icols = jj + ii[i]; 2155 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2156 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2157 } 2158 2159 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2160 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2161 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2162 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 /*MC 2167 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2168 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2169 the matrix is stored. 2170 2171 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2172 can call MatSetOption(Mat, MAT_HERMITIAN); 2173 2174 Options Database Keys: 2175 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2176 2177 Level: beginner 2178 2179 .seealso: MatCreateMPISBAIJ 2180 M*/ 2181 2182 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2183 2184 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2185 { 2186 Mat_MPISBAIJ *b; 2187 PetscErrorCode ierr; 2188 PetscBool flg = PETSC_FALSE; 2189 2190 PetscFunctionBegin; 2191 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2192 B->data = (void*)b; 2193 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2194 2195 B->ops->destroy = MatDestroy_MPISBAIJ; 2196 B->ops->view = MatView_MPISBAIJ; 2197 B->assembled = PETSC_FALSE; 2198 B->insertmode = NOT_SET_VALUES; 2199 2200 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2201 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2202 2203 /* build local table of row and column ownerships */ 2204 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2205 2206 /* build cache for off array entries formed */ 2207 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2208 2209 b->donotstash = PETSC_FALSE; 2210 b->colmap = NULL; 2211 b->garray = NULL; 2212 b->roworiented = PETSC_TRUE; 2213 2214 /* stuff used in block assembly */ 2215 b->barray = 0; 2216 2217 /* stuff used for matrix vector multiply */ 2218 b->lvec = 0; 2219 b->Mvctx = 0; 2220 b->slvec0 = 0; 2221 b->slvec0b = 0; 2222 b->slvec1 = 0; 2223 b->slvec1a = 0; 2224 b->slvec1b = 0; 2225 b->sMvctx = 0; 2226 2227 /* stuff for MatGetRow() */ 2228 b->rowindices = 0; 2229 b->rowvalues = 0; 2230 b->getrowactive = PETSC_FALSE; 2231 2232 /* hash table stuff */ 2233 b->ht = 0; 2234 b->hd = 0; 2235 b->ht_size = 0; 2236 b->ht_flag = PETSC_FALSE; 2237 b->ht_fact = 0; 2238 b->ht_total_ct = 0; 2239 b->ht_insert_ct = 0; 2240 2241 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2242 b->ijonly = PETSC_FALSE; 2243 2244 b->in_loc = 0; 2245 b->v_loc = 0; 2246 b->n_loc = 0; 2247 2248 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2249 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2250 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2251 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2252 #if defined(PETSC_HAVE_ELEMENTAL) 2253 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2254 #endif 2255 2256 B->symmetric = PETSC_TRUE; 2257 B->structurally_symmetric = PETSC_TRUE; 2258 B->symmetric_set = PETSC_TRUE; 2259 B->structurally_symmetric_set = PETSC_TRUE; 2260 B->symmetric_eternal = PETSC_TRUE; 2261 2262 B->hermitian = PETSC_FALSE; 2263 B->hermitian_set = PETSC_FALSE; 2264 2265 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2266 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2267 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2268 if (flg) { 2269 PetscReal fact = 1.39; 2270 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2271 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2272 if (fact <= 1.0) fact = 1.39; 2273 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2274 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2275 } 2276 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2277 PetscFunctionReturn(0); 2278 } 2279 2280 /*MC 2281 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2282 2283 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2284 and MATMPISBAIJ otherwise. 2285 2286 Options Database Keys: 2287 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2288 2289 Level: beginner 2290 2291 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2292 M*/ 2293 2294 /*@C 2295 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2296 the user should preallocate the matrix storage by setting the parameters 2297 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2298 performance can be increased by more than a factor of 50. 2299 2300 Collective on Mat 2301 2302 Input Parameters: 2303 + B - the matrix 2304 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2305 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2306 . d_nz - number of block nonzeros per block row in diagonal portion of local 2307 submatrix (same for all local rows) 2308 . d_nnz - array containing the number of block nonzeros in the various block rows 2309 in the upper triangular and diagonal part of the in diagonal portion of the local 2310 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2311 for the diagonal entry and set a value even if it is zero. 2312 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2313 submatrix (same for all local rows). 2314 - o_nnz - array containing the number of nonzeros in the various block rows of the 2315 off-diagonal portion of the local submatrix that is right of the diagonal 2316 (possibly different for each block row) or NULL. 2317 2318 2319 Options Database Keys: 2320 . -mat_no_unroll - uses code that does not unroll the loops in the 2321 block calculations (much slower) 2322 . -mat_block_size - size of the blocks to use 2323 2324 Notes: 2325 2326 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2327 than it must be used on all processors that share the object for that argument. 2328 2329 If the *_nnz parameter is given then the *_nz parameter is ignored 2330 2331 Storage Information: 2332 For a square global matrix we define each processor's diagonal portion 2333 to be its local rows and the corresponding columns (a square submatrix); 2334 each processor's off-diagonal portion encompasses the remainder of the 2335 local matrix (a rectangular submatrix). 2336 2337 The user can specify preallocated storage for the diagonal part of 2338 the local submatrix with either d_nz or d_nnz (not both). Set 2339 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2340 memory allocation. Likewise, specify preallocated storage for the 2341 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2342 2343 You can call MatGetInfo() to get information on how effective the preallocation was; 2344 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2345 You can also run with the option -info and look for messages with the string 2346 malloc in them to see if additional memory allocation was needed. 2347 2348 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2349 the figure below we depict these three local rows and all columns (0-11). 2350 2351 .vb 2352 0 1 2 3 4 5 6 7 8 9 10 11 2353 -------------------------- 2354 row 3 |. . . d d d o o o o o o 2355 row 4 |. . . d d d o o o o o o 2356 row 5 |. . . d d d o o o o o o 2357 -------------------------- 2358 .ve 2359 2360 Thus, any entries in the d locations are stored in the d (diagonal) 2361 submatrix, and any entries in the o locations are stored in the 2362 o (off-diagonal) submatrix. Note that the d matrix is stored in 2363 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2364 2365 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2366 plus the diagonal part of the d matrix, 2367 and o_nz should indicate the number of block nonzeros per row in the o matrix 2368 2369 In general, for PDE problems in which most nonzeros are near the diagonal, 2370 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2371 or you will get TERRIBLE performance; see the users' manual chapter on 2372 matrices. 2373 2374 Level: intermediate 2375 2376 .keywords: matrix, block, aij, compressed row, sparse, parallel 2377 2378 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2379 @*/ 2380 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2381 { 2382 PetscErrorCode ierr; 2383 2384 PetscFunctionBegin; 2385 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2386 PetscValidType(B,1); 2387 PetscValidLogicalCollectiveInt(B,bs,2); 2388 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2389 PetscFunctionReturn(0); 2390 } 2391 2392 /*@C 2393 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2394 (block compressed row). For good matrix assembly performance 2395 the user should preallocate the matrix storage by setting the parameters 2396 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2397 performance can be increased by more than a factor of 50. 2398 2399 Collective on MPI_Comm 2400 2401 Input Parameters: 2402 + comm - MPI communicator 2403 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2404 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2405 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2406 This value should be the same as the local size used in creating the 2407 y vector for the matrix-vector product y = Ax. 2408 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2409 This value should be the same as the local size used in creating the 2410 x vector for the matrix-vector product y = Ax. 2411 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2412 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2413 . d_nz - number of block nonzeros per block row in diagonal portion of local 2414 submatrix (same for all local rows) 2415 . d_nnz - array containing the number of block nonzeros in the various block rows 2416 in the upper triangular portion of the in diagonal portion of the local 2417 (possibly different for each block block row) or NULL. 2418 If you plan to factor the matrix you must leave room for the diagonal entry and 2419 set its value even if it is zero. 2420 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2421 submatrix (same for all local rows). 2422 - o_nnz - array containing the number of nonzeros in the various block rows of the 2423 off-diagonal portion of the local submatrix (possibly different for 2424 each block row) or NULL. 2425 2426 Output Parameter: 2427 . A - the matrix 2428 2429 Options Database Keys: 2430 . -mat_no_unroll - uses code that does not unroll the loops in the 2431 block calculations (much slower) 2432 . -mat_block_size - size of the blocks to use 2433 . -mat_mpi - use the parallel matrix data structures even on one processor 2434 (defaults to using SeqBAIJ format on one processor) 2435 2436 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2437 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2438 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2439 2440 Notes: 2441 The number of rows and columns must be divisible by blocksize. 2442 This matrix type does not support complex Hermitian operation. 2443 2444 The user MUST specify either the local or global matrix dimensions 2445 (possibly both). 2446 2447 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2448 than it must be used on all processors that share the object for that argument. 2449 2450 If the *_nnz parameter is given then the *_nz parameter is ignored 2451 2452 Storage Information: 2453 For a square global matrix we define each processor's diagonal portion 2454 to be its local rows and the corresponding columns (a square submatrix); 2455 each processor's off-diagonal portion encompasses the remainder of the 2456 local matrix (a rectangular submatrix). 2457 2458 The user can specify preallocated storage for the diagonal part of 2459 the local submatrix with either d_nz or d_nnz (not both). Set 2460 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2461 memory allocation. Likewise, specify preallocated storage for the 2462 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2463 2464 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2465 the figure below we depict these three local rows and all columns (0-11). 2466 2467 .vb 2468 0 1 2 3 4 5 6 7 8 9 10 11 2469 -------------------------- 2470 row 3 |. . . d d d o o o o o o 2471 row 4 |. . . d d d o o o o o o 2472 row 5 |. . . d d d o o o o o o 2473 -------------------------- 2474 .ve 2475 2476 Thus, any entries in the d locations are stored in the d (diagonal) 2477 submatrix, and any entries in the o locations are stored in the 2478 o (off-diagonal) submatrix. Note that the d matrix is stored in 2479 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2480 2481 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2482 plus the diagonal part of the d matrix, 2483 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2484 In general, for PDE problems in which most nonzeros are near the diagonal, 2485 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2486 or you will get TERRIBLE performance; see the users' manual chapter on 2487 matrices. 2488 2489 Level: intermediate 2490 2491 .keywords: matrix, block, aij, compressed row, sparse, parallel 2492 2493 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2494 @*/ 2495 2496 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2497 { 2498 PetscErrorCode ierr; 2499 PetscMPIInt size; 2500 2501 PetscFunctionBegin; 2502 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2503 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2504 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2505 if (size > 1) { 2506 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2507 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2508 } else { 2509 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2510 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2511 } 2512 PetscFunctionReturn(0); 2513 } 2514 2515 2516 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2517 { 2518 Mat mat; 2519 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2520 PetscErrorCode ierr; 2521 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2522 PetscScalar *array; 2523 2524 PetscFunctionBegin; 2525 *newmat = 0; 2526 2527 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2528 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2529 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2530 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2531 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2532 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2533 2534 mat->factortype = matin->factortype; 2535 mat->preallocated = PETSC_TRUE; 2536 mat->assembled = PETSC_TRUE; 2537 mat->insertmode = NOT_SET_VALUES; 2538 2539 a = (Mat_MPISBAIJ*)mat->data; 2540 a->bs2 = oldmat->bs2; 2541 a->mbs = oldmat->mbs; 2542 a->nbs = oldmat->nbs; 2543 a->Mbs = oldmat->Mbs; 2544 a->Nbs = oldmat->Nbs; 2545 2546 2547 a->size = oldmat->size; 2548 a->rank = oldmat->rank; 2549 a->donotstash = oldmat->donotstash; 2550 a->roworiented = oldmat->roworiented; 2551 a->rowindices = 0; 2552 a->rowvalues = 0; 2553 a->getrowactive = PETSC_FALSE; 2554 a->barray = 0; 2555 a->rstartbs = oldmat->rstartbs; 2556 a->rendbs = oldmat->rendbs; 2557 a->cstartbs = oldmat->cstartbs; 2558 a->cendbs = oldmat->cendbs; 2559 2560 /* hash table stuff */ 2561 a->ht = 0; 2562 a->hd = 0; 2563 a->ht_size = 0; 2564 a->ht_flag = oldmat->ht_flag; 2565 a->ht_fact = oldmat->ht_fact; 2566 a->ht_total_ct = 0; 2567 a->ht_insert_ct = 0; 2568 2569 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2570 if (oldmat->colmap) { 2571 #if defined(PETSC_USE_CTABLE) 2572 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2573 #else 2574 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2575 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2576 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2577 #endif 2578 } else a->colmap = 0; 2579 2580 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2581 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2582 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2583 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2584 } else a->garray = 0; 2585 2586 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2587 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2588 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2589 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2590 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2591 2592 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2593 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2594 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2595 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2596 2597 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2598 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2599 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2600 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2601 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2602 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2603 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2604 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2605 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2606 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2607 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2608 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2609 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2610 2611 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2612 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2613 a->sMvctx = oldmat->sMvctx; 2614 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2615 2616 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2617 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2618 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2619 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2620 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2621 *newmat = mat; 2622 PetscFunctionReturn(0); 2623 } 2624 2625 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2626 { 2627 PetscErrorCode ierr; 2628 PetscInt i,nz,j,rstart,rend; 2629 PetscScalar *vals,*buf; 2630 MPI_Comm comm; 2631 MPI_Status status; 2632 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2633 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2634 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2635 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2636 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2637 PetscInt dcount,kmax,k,nzcount,tmp; 2638 int fd; 2639 PetscBool isbinary; 2640 2641 PetscFunctionBegin; 2642 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2643 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 2644 2645 /* force binary viewer to load .info file if it has not yet done so */ 2646 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2647 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2648 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2649 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2650 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2651 if (bs < 0) bs = 1; 2652 2653 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2654 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2655 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2656 if (!rank) { 2657 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2658 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2659 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2660 } 2661 2662 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2663 M = header[1]; 2664 N = header[2]; 2665 2666 /* If global sizes are set, check if they are consistent with that given in the file */ 2667 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2668 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2669 2670 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2671 2672 /* 2673 This code adds extra rows to make sure the number of rows is 2674 divisible by the blocksize 2675 */ 2676 Mbs = M/bs; 2677 extra_rows = bs - M + bs*(Mbs); 2678 if (extra_rows == bs) extra_rows = 0; 2679 else Mbs++; 2680 if (extra_rows &&!rank) { 2681 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2682 } 2683 2684 /* determine ownership of all rows */ 2685 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2686 mbs = Mbs/size + ((Mbs % size) > rank); 2687 m = mbs*bs; 2688 } else { /* User Set */ 2689 m = newmat->rmap->n; 2690 mbs = m/bs; 2691 } 2692 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2693 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2694 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2695 rowners[0] = 0; 2696 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2697 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2698 rstart = rowners[rank]; 2699 rend = rowners[rank+1]; 2700 2701 /* distribute row lengths to all processors */ 2702 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2703 if (!rank) { 2704 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2705 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2706 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2707 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2708 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2709 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2710 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2711 } else { 2712 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2713 } 2714 2715 if (!rank) { /* procs[0] */ 2716 /* calculate the number of nonzeros on each processor */ 2717 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2718 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2719 for (i=0; i<size; i++) { 2720 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2721 procsnz[i] += rowlengths[j]; 2722 } 2723 } 2724 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2725 2726 /* determine max buffer needed and allocate it */ 2727 maxnz = 0; 2728 for (i=0; i<size; i++) { 2729 maxnz = PetscMax(maxnz,procsnz[i]); 2730 } 2731 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2732 2733 /* read in my part of the matrix column indices */ 2734 nz = procsnz[0]; 2735 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2736 mycols = ibuf; 2737 if (size == 1) nz -= extra_rows; 2738 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2739 if (size == 1) { 2740 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2741 } 2742 2743 /* read in every ones (except the last) and ship off */ 2744 for (i=1; i<size-1; i++) { 2745 nz = procsnz[i]; 2746 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2747 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2748 } 2749 /* read in the stuff for the last proc */ 2750 if (size != 1) { 2751 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2752 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2753 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2754 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2755 } 2756 ierr = PetscFree(cols);CHKERRQ(ierr); 2757 } else { /* procs[i], i>0 */ 2758 /* determine buffer space needed for message */ 2759 nz = 0; 2760 for (i=0; i<m; i++) nz += locrowlens[i]; 2761 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2762 mycols = ibuf; 2763 /* receive message of column indices*/ 2764 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2765 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2766 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2767 } 2768 2769 /* loop over local rows, determining number of off diagonal entries */ 2770 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2771 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2772 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2773 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2774 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2775 rowcount = 0; 2776 nzcount = 0; 2777 for (i=0; i<mbs; i++) { 2778 dcount = 0; 2779 odcount = 0; 2780 for (j=0; j<bs; j++) { 2781 kmax = locrowlens[rowcount]; 2782 for (k=0; k<kmax; k++) { 2783 tmp = mycols[nzcount++]/bs; /* block col. index */ 2784 if (!mask[tmp]) { 2785 mask[tmp] = 1; 2786 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2787 else masked1[dcount++] = tmp; /* entry in diag portion */ 2788 } 2789 } 2790 rowcount++; 2791 } 2792 2793 dlens[i] = dcount; /* d_nzz[i] */ 2794 odlens[i] = odcount; /* o_nzz[i] */ 2795 2796 /* zero out the mask elements we set */ 2797 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2798 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2799 } 2800 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2801 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2802 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2803 2804 if (!rank) { 2805 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2806 /* read in my part of the matrix numerical values */ 2807 nz = procsnz[0]; 2808 vals = buf; 2809 mycols = ibuf; 2810 if (size == 1) nz -= extra_rows; 2811 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2812 if (size == 1) { 2813 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2814 } 2815 2816 /* insert into matrix */ 2817 jj = rstart*bs; 2818 for (i=0; i<m; i++) { 2819 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2820 mycols += locrowlens[i]; 2821 vals += locrowlens[i]; 2822 jj++; 2823 } 2824 2825 /* read in other processors (except the last one) and ship out */ 2826 for (i=1; i<size-1; i++) { 2827 nz = procsnz[i]; 2828 vals = buf; 2829 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2830 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2831 } 2832 /* the last proc */ 2833 if (size != 1) { 2834 nz = procsnz[i] - extra_rows; 2835 vals = buf; 2836 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2837 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2838 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2839 } 2840 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2841 2842 } else { 2843 /* receive numeric values */ 2844 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2845 2846 /* receive message of values*/ 2847 vals = buf; 2848 mycols = ibuf; 2849 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2850 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2851 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2852 2853 /* insert into matrix */ 2854 jj = rstart*bs; 2855 for (i=0; i<m; i++) { 2856 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2857 mycols += locrowlens[i]; 2858 vals += locrowlens[i]; 2859 jj++; 2860 } 2861 } 2862 2863 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2864 ierr = PetscFree(buf);CHKERRQ(ierr); 2865 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2866 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2867 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2868 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2869 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2870 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2871 PetscFunctionReturn(0); 2872 } 2873 2874 /*XXXXX@ 2875 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2876 2877 Input Parameters: 2878 . mat - the matrix 2879 . fact - factor 2880 2881 Not Collective on Mat, each process can have a different hash factor 2882 2883 Level: advanced 2884 2885 Notes: 2886 This can also be set by the command line option: -mat_use_hash_table fact 2887 2888 .keywords: matrix, hashtable, factor, HT 2889 2890 .seealso: MatSetOption() 2891 @XXXXX*/ 2892 2893 2894 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2895 { 2896 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2897 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2898 PetscReal atmp; 2899 PetscReal *work,*svalues,*rvalues; 2900 PetscErrorCode ierr; 2901 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2902 PetscMPIInt rank,size; 2903 PetscInt *rowners_bs,dest,count,source; 2904 PetscScalar *va; 2905 MatScalar *ba; 2906 MPI_Status stat; 2907 2908 PetscFunctionBegin; 2909 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2910 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2911 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2912 2913 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2914 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2915 2916 bs = A->rmap->bs; 2917 mbs = a->mbs; 2918 Mbs = a->Mbs; 2919 ba = b->a; 2920 bi = b->i; 2921 bj = b->j; 2922 2923 /* find ownerships */ 2924 rowners_bs = A->rmap->range; 2925 2926 /* each proc creates an array to be distributed */ 2927 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2928 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2929 2930 /* row_max for B */ 2931 if (rank != size-1) { 2932 for (i=0; i<mbs; i++) { 2933 ncols = bi[1] - bi[0]; bi++; 2934 brow = bs*i; 2935 for (j=0; j<ncols; j++) { 2936 bcol = bs*(*bj); 2937 for (kcol=0; kcol<bs; kcol++) { 2938 col = bcol + kcol; /* local col index */ 2939 col += rowners_bs[rank+1]; /* global col index */ 2940 for (krow=0; krow<bs; krow++) { 2941 atmp = PetscAbsScalar(*ba); ba++; 2942 row = brow + krow; /* local row index */ 2943 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2944 if (work[col] < atmp) work[col] = atmp; 2945 } 2946 } 2947 bj++; 2948 } 2949 } 2950 2951 /* send values to its owners */ 2952 for (dest=rank+1; dest<size; dest++) { 2953 svalues = work + rowners_bs[dest]; 2954 count = rowners_bs[dest+1]-rowners_bs[dest]; 2955 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2956 } 2957 } 2958 2959 /* receive values */ 2960 if (rank) { 2961 rvalues = work; 2962 count = rowners_bs[rank+1]-rowners_bs[rank]; 2963 for (source=0; source<rank; source++) { 2964 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2965 /* process values */ 2966 for (i=0; i<count; i++) { 2967 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2968 } 2969 } 2970 } 2971 2972 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2973 ierr = PetscFree(work);CHKERRQ(ierr); 2974 PetscFunctionReturn(0); 2975 } 2976 2977 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2978 { 2979 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2980 PetscErrorCode ierr; 2981 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2982 PetscScalar *x,*ptr,*from; 2983 Vec bb1; 2984 const PetscScalar *b; 2985 2986 PetscFunctionBegin; 2987 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2988 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2989 2990 if (flag == SOR_APPLY_UPPER) { 2991 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2992 PetscFunctionReturn(0); 2993 } 2994 2995 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2996 if (flag & SOR_ZERO_INITIAL_GUESS) { 2997 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2998 its--; 2999 } 3000 3001 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3002 while (its--) { 3003 3004 /* lower triangular part: slvec0b = - B^T*xx */ 3005 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3006 3007 /* copy xx into slvec0a */ 3008 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3009 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3010 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3011 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3012 3013 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3014 3015 /* copy bb into slvec1a */ 3016 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3017 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3018 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3019 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3020 3021 /* set slvec1b = 0 */ 3022 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3023 3024 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3025 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3026 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3027 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3028 3029 /* upper triangular part: bb1 = bb1 - B*x */ 3030 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3031 3032 /* local diagonal sweep */ 3033 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3034 } 3035 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3036 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3037 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3038 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3039 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3040 } else if (flag & SOR_EISENSTAT) { 3041 Vec xx1; 3042 PetscBool hasop; 3043 const PetscScalar *diag; 3044 PetscScalar *sl,scale = (omega - 2.0)/omega; 3045 PetscInt i,n; 3046 3047 if (!mat->xx1) { 3048 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3049 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3050 } 3051 xx1 = mat->xx1; 3052 bb1 = mat->bb1; 3053 3054 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3055 3056 if (!mat->diag) { 3057 /* this is wrong for same matrix with new nonzero values */ 3058 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3059 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3060 } 3061 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3062 3063 if (hasop) { 3064 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3065 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3066 } else { 3067 /* 3068 These two lines are replaced by code that may be a bit faster for a good compiler 3069 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3070 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3071 */ 3072 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3073 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3074 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3075 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3076 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3077 if (omega == 1.0) { 3078 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3079 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3080 } else { 3081 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3082 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3083 } 3084 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3085 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3086 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3087 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3088 } 3089 3090 /* multiply off-diagonal portion of matrix */ 3091 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3092 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3093 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3094 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3095 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3096 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3097 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3098 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3099 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3100 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3101 3102 /* local sweep */ 3103 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3104 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3105 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3106 PetscFunctionReturn(0); 3107 } 3108 3109 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3110 { 3111 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3112 PetscErrorCode ierr; 3113 Vec lvec1,bb1; 3114 3115 PetscFunctionBegin; 3116 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3117 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3118 3119 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3120 if (flag & SOR_ZERO_INITIAL_GUESS) { 3121 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3122 its--; 3123 } 3124 3125 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3126 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3127 while (its--) { 3128 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3129 3130 /* lower diagonal part: bb1 = bb - B^T*xx */ 3131 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3132 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3133 3134 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3135 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3136 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3137 3138 /* upper diagonal part: bb1 = bb1 - B*x */ 3139 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3140 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3141 3142 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3143 3144 /* diagonal sweep */ 3145 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3146 } 3147 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3148 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3149 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3150 PetscFunctionReturn(0); 3151 } 3152 3153 /*@ 3154 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3155 CSR format the local rows. 3156 3157 Collective on MPI_Comm 3158 3159 Input Parameters: 3160 + comm - MPI communicator 3161 . bs - the block size, only a block size of 1 is supported 3162 . m - number of local rows (Cannot be PETSC_DECIDE) 3163 . n - This value should be the same as the local size used in creating the 3164 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3165 calculated if N is given) For square matrices n is almost always m. 3166 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3167 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3168 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 3169 . j - column indices 3170 - a - matrix values 3171 3172 Output Parameter: 3173 . mat - the matrix 3174 3175 Level: intermediate 3176 3177 Notes: 3178 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3179 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3180 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3181 3182 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3183 3184 .keywords: matrix, aij, compressed row, sparse, parallel 3185 3186 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3187 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3188 @*/ 3189 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3190 { 3191 PetscErrorCode ierr; 3192 3193 3194 PetscFunctionBegin; 3195 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3196 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3197 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3198 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3199 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3200 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3201 PetscFunctionReturn(0); 3202 } 3203 3204 3205 /*@C 3206 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3207 (the default parallel PETSc format). 3208 3209 Collective on MPI_Comm 3210 3211 Input Parameters: 3212 + B - the matrix 3213 . bs - the block size 3214 . i - the indices into j for the start of each local row (starts with zero) 3215 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3216 - v - optional values in the matrix 3217 3218 Level: developer 3219 3220 .keywords: matrix, aij, compressed row, sparse, parallel 3221 3222 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3223 @*/ 3224 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3225 { 3226 PetscErrorCode ierr; 3227 3228 PetscFunctionBegin; 3229 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3230 PetscFunctionReturn(0); 3231 } 3232 3233 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3234 { 3235 PetscErrorCode ierr; 3236 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3237 PetscInt *indx; 3238 PetscScalar *values; 3239 3240 PetscFunctionBegin; 3241 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3242 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3243 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3244 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3245 PetscInt *bindx,rmax=a->rmax,j; 3246 3247 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3248 mbs = m/bs; Nbs = N/cbs; 3249 if (n == PETSC_DECIDE) { 3250 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3251 } 3252 /* Check sum(n) = Nbs */ 3253 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3254 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3255 3256 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3257 rstart -= mbs; 3258 3259 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3260 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3261 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3262 for (i=0; i<mbs; i++) { 3263 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3264 nnz = nnz/bs; 3265 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3266 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3267 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3268 } 3269 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3270 ierr = PetscFree(bindx);CHKERRQ(ierr); 3271 3272 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3273 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3274 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3275 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3276 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3277 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3278 } 3279 3280 /* numeric phase */ 3281 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3282 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3283 3284 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3285 for (i=0; i<m; i++) { 3286 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3287 Ii = i + rstart; 3288 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3289 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3290 } 3291 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3292 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3293 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3294 PetscFunctionReturn(0); 3295 } 3296