1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 637 } 638 /* Bmat */ 639 v = bmat->a; jj = bmat->j; 640 for (brow=0; brow<mbs; brow++) { 641 grow = bs*(rstart + brow); 642 nz = bmat->i[brow+1] - bmat->i[brow]; 643 for (bcol=0; bcol<nz; bcol++) { 644 gcol = bs*garray[*jj]; jj++; 645 for (col=0; col<bs; col++) { 646 for (row=0; row<bs; row++) { 647 vabs = PetscAbsScalar(*v); v++; 648 rsum[gcol+col] += vabs; 649 rsum[grow+row] += vabs; 650 } 651 } 652 } 653 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 654 } 655 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 656 *norm = 0.0; 657 for (col=0; col<mat->cmap->N; col++) { 658 if (rsum2[col] > *norm) *norm = rsum2[col]; 659 } 660 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 668 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt nstash,reallocs; 673 674 PetscFunctionBegin; 675 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 676 677 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 678 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 679 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 680 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 681 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 682 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 688 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 689 { 690 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 691 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 692 PetscErrorCode ierr; 693 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 694 PetscInt *row,*col; 695 PetscBool other_disassembled; 696 PetscMPIInt n; 697 PetscBool r1,r2,r3; 698 MatScalar *val; 699 700 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 701 PetscFunctionBegin; 702 if (!baij->donotstash && !mat->nooffprocentries) { 703 while (1) { 704 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 705 if (!flg) break; 706 707 for (i=0; i<n;) { 708 /* Now identify the consecutive vals belonging to the same row */ 709 for (j=i,rstart=row[j]; j<n; j++) { 710 if (row[j] != rstart) break; 711 } 712 if (j < n) ncols = j-i; 713 else ncols = n-i; 714 /* Now assemble all these values with a single function call */ 715 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 716 i = j; 717 } 718 } 719 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 720 /* Now process the block-stash. Since the values are stashed column-oriented, 721 set the roworiented flag to column oriented, and after MatSetValues() 722 restore the original flags */ 723 r1 = baij->roworiented; 724 r2 = a->roworiented; 725 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 726 727 baij->roworiented = PETSC_FALSE; 728 a->roworiented = PETSC_FALSE; 729 730 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 731 while (1) { 732 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 733 if (!flg) break; 734 735 for (i=0; i<n;) { 736 /* Now identify the consecutive vals belonging to the same row */ 737 for (j=i,rstart=row[j]; j<n; j++) { 738 if (row[j] != rstart) break; 739 } 740 if (j < n) ncols = j-i; 741 else ncols = n-i; 742 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 743 i = j; 744 } 745 } 746 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 747 748 baij->roworiented = r1; 749 a->roworiented = r2; 750 751 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 752 } 753 754 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 755 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 756 757 /* determine if any processor has disassembled, if so we must 758 also disassemble ourselfs, in order that we may reassemble. */ 759 /* 760 if nonzero structure of submatrix B cannot change then we know that 761 no processor disassembled thus we can skip this stuff 762 */ 763 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 764 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 765 if (mat->was_assembled && !other_disassembled) { 766 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 767 } 768 } 769 770 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 771 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 772 } 773 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 774 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 775 776 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 777 778 baij->rowvalues = 0; 779 780 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 781 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 782 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 783 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 784 } 785 PetscFunctionReturn(0); 786 } 787 788 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer); 789 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 790 #include <petscdraw.h> 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 793 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 794 { 795 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 796 PetscErrorCode ierr; 797 PetscInt bs = mat->rmap->bs; 798 PetscMPIInt rank = baij->rank; 799 PetscBool iascii,isdraw; 800 PetscViewer sviewer; 801 PetscViewerFormat format; 802 803 PetscFunctionBegin; 804 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 805 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 806 if (iascii) { 807 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 808 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 809 MatInfo info; 810 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 811 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 812 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 814 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 816 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 818 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 819 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 820 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 821 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } else if (format == PETSC_VIEWER_ASCII_INFO) { 824 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 827 PetscFunctionReturn(0); 828 } 829 } 830 831 if (isdraw) { 832 PetscDraw draw; 833 PetscBool isnull; 834 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 835 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 836 if (isnull) PetscFunctionReturn(0); 837 } 838 839 { 840 /* assemble the entire matrix onto first processor. */ 841 Mat A; 842 Mat_SeqSBAIJ *Aloc; 843 Mat_SeqBAIJ *Bloc; 844 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 845 MatScalar *a; 846 const char *matname; 847 848 /* Should this be the same type as mat? */ 849 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 850 if (!rank) { 851 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 852 } else { 853 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 854 } 855 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 856 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 857 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 858 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 859 860 /* copy over the A part */ 861 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 862 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 863 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 864 865 for (i=0; i<mbs; i++) { 866 rvals[0] = bs*(baij->rstartbs + i); 867 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 868 for (j=ai[i]; j<ai[i+1]; j++) { 869 col = (baij->cstartbs+aj[j])*bs; 870 for (k=0; k<bs; k++) { 871 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 872 col++; 873 a += bs; 874 } 875 } 876 } 877 /* copy over the B part */ 878 Bloc = (Mat_SeqBAIJ*)baij->B->data; 879 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 880 for (i=0; i<mbs; i++) { 881 882 rvals[0] = bs*(baij->rstartbs + i); 883 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 884 for (j=ai[i]; j<ai[i+1]; j++) { 885 col = baij->garray[aj[j]]*bs; 886 for (k=0; k<bs; k++) { 887 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 888 col++; 889 a += bs; 890 } 891 } 892 } 893 ierr = PetscFree(rvals);CHKERRQ(ierr); 894 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 895 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 /* 897 Everyone has to call to draw the matrix since the graphics waits are 898 synchronized across all processors that share the PetscDraw object 899 */ 900 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 901 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 902 if (!rank) { 903 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 904 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 905 } 906 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 907 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 908 ierr = MatDestroy(&A);CHKERRQ(ierr); 909 } 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatView_MPISBAIJ" 915 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 916 { 917 PetscErrorCode ierr; 918 PetscBool iascii,isdraw,issocket,isbinary; 919 920 PetscFunctionBegin; 921 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 922 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 923 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 924 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 925 if (iascii || isdraw || issocket || isbinary) { 926 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 927 } 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatDestroy_MPISBAIJ" 933 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 934 { 935 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 936 PetscErrorCode ierr; 937 938 PetscFunctionBegin; 939 #if defined(PETSC_USE_LOG) 940 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 941 #endif 942 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 943 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 944 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 945 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 946 #if defined(PETSC_USE_CTABLE) 947 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 948 #else 949 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 950 #endif 951 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 952 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 953 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 954 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 955 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 956 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 957 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 958 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 959 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 960 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 961 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 962 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 963 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 964 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 965 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 966 #if defined(PETSC_USE_REAL_MAT_SINGLE) 967 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 968 #endif 969 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 970 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 971 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 972 ierr = PetscFree(mat->data);CHKERRQ(ierr); 973 974 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 975 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 976 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 977 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 978 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 979 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 980 #if defined(PETSC_HAVE_ELEMENTAL) 981 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 982 #endif 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNCT__ 987 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 988 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 989 { 990 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 991 PetscErrorCode ierr; 992 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 993 PetscScalar *from; 994 const PetscScalar *x; 995 996 PetscFunctionBegin; 997 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 998 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 999 1000 /* diagonal part */ 1001 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1002 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1003 1004 /* subdiagonal part */ 1005 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1006 1007 /* copy x into the vec slvec0 */ 1008 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1009 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1010 1011 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1012 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1013 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1014 1015 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1016 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1017 /* supperdiagonal part */ 1018 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatMult_MPISBAIJ" 1024 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1025 { 1026 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1027 PetscErrorCode ierr; 1028 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1029 PetscScalar *from; 1030 const PetscScalar *x; 1031 1032 PetscFunctionBegin; 1033 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1034 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1035 1036 /* diagonal part */ 1037 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1038 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1039 1040 /* subdiagonal part */ 1041 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1042 1043 /* copy x into the vec slvec0 */ 1044 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1045 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1046 1047 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1048 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1049 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1050 1051 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 /* supperdiagonal part */ 1054 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1060 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1061 { 1062 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1063 PetscErrorCode ierr; 1064 PetscInt nt; 1065 1066 PetscFunctionBegin; 1067 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1068 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1069 1070 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1071 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1072 1073 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1074 /* do diagonal part */ 1075 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1076 /* do supperdiagonal part */ 1077 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1078 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1079 /* do subdiagonal part */ 1080 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1081 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1082 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1088 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1089 { 1090 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1093 PetscScalar *from,zero=0.0; 1094 const PetscScalar *x; 1095 1096 PetscFunctionBegin; 1097 /* 1098 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1099 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1100 */ 1101 /* diagonal part */ 1102 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1103 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1104 1105 /* subdiagonal part */ 1106 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1107 1108 /* copy x into the vec slvec0 */ 1109 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1110 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1111 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1112 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1113 1114 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1116 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 1118 /* supperdiagonal part */ 1119 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1125 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1126 { 1127 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1132 /* do diagonal part */ 1133 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1134 /* do supperdiagonal part */ 1135 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1136 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1137 1138 /* do subdiagonal part */ 1139 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1140 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 /* 1146 This only works correctly for square matrices where the subblock A->A is the 1147 diagonal block 1148 */ 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1151 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1152 { 1153 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1154 PetscErrorCode ierr; 1155 1156 PetscFunctionBegin; 1157 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1158 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatScale_MPISBAIJ" 1164 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1165 { 1166 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1171 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1177 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1178 { 1179 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1180 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1181 PetscErrorCode ierr; 1182 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1183 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1184 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1185 1186 PetscFunctionBegin; 1187 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1188 mat->getrowactive = PETSC_TRUE; 1189 1190 if (!mat->rowvalues && (idx || v)) { 1191 /* 1192 allocate enough space to hold information from the longest row. 1193 */ 1194 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1195 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1196 PetscInt max = 1,mbs = mat->mbs,tmp; 1197 for (i=0; i<mbs; i++) { 1198 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1199 if (max < tmp) max = tmp; 1200 } 1201 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1202 } 1203 1204 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1205 lrow = row - brstart; /* local row index */ 1206 1207 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1208 if (!v) {pvA = 0; pvB = 0;} 1209 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1210 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1211 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1212 nztot = nzA + nzB; 1213 1214 cmap = mat->garray; 1215 if (v || idx) { 1216 if (nztot) { 1217 /* Sort by increasing column numbers, assuming A and B already sorted */ 1218 PetscInt imark = -1; 1219 if (v) { 1220 *v = v_p = mat->rowvalues; 1221 for (i=0; i<nzB; i++) { 1222 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1223 else break; 1224 } 1225 imark = i; 1226 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1227 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1228 } 1229 if (idx) { 1230 *idx = idx_p = mat->rowindices; 1231 if (imark > -1) { 1232 for (i=0; i<imark; i++) { 1233 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1234 } 1235 } else { 1236 for (i=0; i<nzB; i++) { 1237 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1238 else break; 1239 } 1240 imark = i; 1241 } 1242 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1243 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1244 } 1245 } else { 1246 if (idx) *idx = 0; 1247 if (v) *v = 0; 1248 } 1249 } 1250 *nz = nztot; 1251 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1252 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1258 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1259 { 1260 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1261 1262 PetscFunctionBegin; 1263 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1264 baij->getrowactive = PETSC_FALSE; 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1270 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1271 { 1272 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1273 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1274 1275 PetscFunctionBegin; 1276 aA->getrow_utriangular = PETSC_TRUE; 1277 PetscFunctionReturn(0); 1278 } 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1281 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1282 { 1283 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1284 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1285 1286 PetscFunctionBegin; 1287 aA->getrow_utriangular = PETSC_FALSE; 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1293 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1294 { 1295 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1296 PetscErrorCode ierr; 1297 1298 PetscFunctionBegin; 1299 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1300 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1306 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1307 { 1308 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1309 PetscErrorCode ierr; 1310 1311 PetscFunctionBegin; 1312 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1313 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1318 Input: isrow - distributed(parallel), 1319 iscol_local - locally owned (seq) 1320 */ 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "ISEqual_private" 1323 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1324 { 1325 PetscErrorCode ierr; 1326 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1327 const PetscInt *ptr1,*ptr2; 1328 1329 PetscFunctionBegin; 1330 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1331 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1332 if (sz1 > sz2) { 1333 *flg = PETSC_FALSE; 1334 PetscFunctionReturn(0); 1335 } 1336 1337 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1338 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1339 1340 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1341 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1342 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1343 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1344 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1345 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1346 1347 nmatch=0; 1348 k = 0; 1349 for (i=0; i<sz1; i++){ 1350 for (j=k; j<sz2; j++){ 1351 if (a1[i] == a2[j]) { 1352 k = j; nmatch++; 1353 break; 1354 } 1355 } 1356 } 1357 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1358 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1359 ierr = PetscFree(a1);CHKERRQ(ierr); 1360 ierr = PetscFree(a2);CHKERRQ(ierr); 1361 if (nmatch < sz1) { 1362 *flg = PETSC_FALSE; 1363 } else { 1364 *flg = PETSC_TRUE; 1365 } 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1371 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1372 { 1373 PetscErrorCode ierr; 1374 IS iscol_local; 1375 PetscInt csize; 1376 PetscBool isequal; 1377 1378 PetscFunctionBegin; 1379 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1380 if (call == MAT_REUSE_MATRIX) { 1381 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1382 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1383 } else { 1384 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1385 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1386 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1387 } 1388 1389 /* now call MatGetSubMatrix_MPIBAIJ() */ 1390 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1391 if (call == MAT_INITIAL_MATRIX) { 1392 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1393 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1400 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1401 { 1402 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1403 PetscErrorCode ierr; 1404 1405 PetscFunctionBegin; 1406 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1407 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1413 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1414 { 1415 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1416 Mat A = a->A,B = a->B; 1417 PetscErrorCode ierr; 1418 PetscReal isend[5],irecv[5]; 1419 1420 PetscFunctionBegin; 1421 info->block_size = (PetscReal)matin->rmap->bs; 1422 1423 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1424 1425 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1426 isend[3] = info->memory; isend[4] = info->mallocs; 1427 1428 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1429 1430 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1431 isend[3] += info->memory; isend[4] += info->mallocs; 1432 if (flag == MAT_LOCAL) { 1433 info->nz_used = isend[0]; 1434 info->nz_allocated = isend[1]; 1435 info->nz_unneeded = isend[2]; 1436 info->memory = isend[3]; 1437 info->mallocs = isend[4]; 1438 } else if (flag == MAT_GLOBAL_MAX) { 1439 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1440 1441 info->nz_used = irecv[0]; 1442 info->nz_allocated = irecv[1]; 1443 info->nz_unneeded = irecv[2]; 1444 info->memory = irecv[3]; 1445 info->mallocs = irecv[4]; 1446 } else if (flag == MAT_GLOBAL_SUM) { 1447 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1448 1449 info->nz_used = irecv[0]; 1450 info->nz_allocated = irecv[1]; 1451 info->nz_unneeded = irecv[2]; 1452 info->memory = irecv[3]; 1453 info->mallocs = irecv[4]; 1454 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1455 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1456 info->fill_ratio_needed = 0; 1457 info->factor_mallocs = 0; 1458 PetscFunctionReturn(0); 1459 } 1460 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1463 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1464 { 1465 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1466 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1467 PetscErrorCode ierr; 1468 1469 PetscFunctionBegin; 1470 switch (op) { 1471 case MAT_NEW_NONZERO_LOCATIONS: 1472 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1473 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1474 case MAT_KEEP_NONZERO_PATTERN: 1475 case MAT_NEW_NONZERO_LOCATION_ERR: 1476 MatCheckPreallocated(A,1); 1477 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1478 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1479 break; 1480 case MAT_ROW_ORIENTED: 1481 MatCheckPreallocated(A,1); 1482 a->roworiented = flg; 1483 1484 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1485 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1486 break; 1487 case MAT_NEW_DIAGONALS: 1488 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1489 break; 1490 case MAT_IGNORE_OFF_PROC_ENTRIES: 1491 a->donotstash = flg; 1492 break; 1493 case MAT_USE_HASH_TABLE: 1494 a->ht_flag = flg; 1495 break; 1496 case MAT_HERMITIAN: 1497 MatCheckPreallocated(A,1); 1498 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1499 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1500 1501 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1502 break; 1503 case MAT_SPD: 1504 A->spd_set = PETSC_TRUE; 1505 A->spd = flg; 1506 if (flg) { 1507 A->symmetric = PETSC_TRUE; 1508 A->structurally_symmetric = PETSC_TRUE; 1509 A->symmetric_set = PETSC_TRUE; 1510 A->structurally_symmetric_set = PETSC_TRUE; 1511 } 1512 break; 1513 case MAT_SYMMETRIC: 1514 MatCheckPreallocated(A,1); 1515 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1516 break; 1517 case MAT_STRUCTURALLY_SYMMETRIC: 1518 MatCheckPreallocated(A,1); 1519 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1520 break; 1521 case MAT_SYMMETRY_ETERNAL: 1522 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1523 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1524 break; 1525 case MAT_IGNORE_LOWER_TRIANGULAR: 1526 aA->ignore_ltriangular = flg; 1527 break; 1528 case MAT_ERROR_LOWER_TRIANGULAR: 1529 aA->ignore_ltriangular = flg; 1530 break; 1531 case MAT_GETROW_UPPERTRIANGULAR: 1532 aA->getrow_utriangular = flg; 1533 break; 1534 default: 1535 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1536 } 1537 PetscFunctionReturn(0); 1538 } 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1542 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1543 { 1544 PetscErrorCode ierr; 1545 1546 PetscFunctionBegin; 1547 if (MAT_INITIAL_MATRIX || *B != A) { 1548 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1549 } 1550 PetscFunctionReturn(0); 1551 } 1552 1553 #undef __FUNCT__ 1554 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1555 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1556 { 1557 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1558 Mat a = baij->A, b=baij->B; 1559 PetscErrorCode ierr; 1560 PetscInt nv,m,n; 1561 PetscBool flg; 1562 1563 PetscFunctionBegin; 1564 if (ll != rr) { 1565 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1566 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1567 } 1568 if (!ll) PetscFunctionReturn(0); 1569 1570 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1571 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1572 1573 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1574 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1575 1576 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1577 1578 /* left diagonalscale the off-diagonal part */ 1579 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1580 1581 /* scale the diagonal part */ 1582 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1583 1584 /* right diagonalscale the off-diagonal part */ 1585 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1592 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1593 { 1594 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1595 PetscErrorCode ierr; 1596 1597 PetscFunctionBegin; 1598 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1603 1604 #undef __FUNCT__ 1605 #define __FUNCT__ "MatEqual_MPISBAIJ" 1606 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1607 { 1608 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1609 Mat a,b,c,d; 1610 PetscBool flg; 1611 PetscErrorCode ierr; 1612 1613 PetscFunctionBegin; 1614 a = matA->A; b = matA->B; 1615 c = matB->A; d = matB->B; 1616 1617 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1618 if (flg) { 1619 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1620 } 1621 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1622 PetscFunctionReturn(0); 1623 } 1624 1625 #undef __FUNCT__ 1626 #define __FUNCT__ "MatCopy_MPISBAIJ" 1627 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1628 { 1629 PetscErrorCode ierr; 1630 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1631 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1632 1633 PetscFunctionBegin; 1634 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1635 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1636 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1637 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1638 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1639 } else { 1640 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1641 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1642 } 1643 PetscFunctionReturn(0); 1644 } 1645 1646 #undef __FUNCT__ 1647 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1648 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1649 { 1650 PetscErrorCode ierr; 1651 1652 PetscFunctionBegin; 1653 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1659 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1660 { 1661 PetscErrorCode ierr; 1662 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1663 PetscBLASInt bnz,one=1; 1664 Mat_SeqSBAIJ *xa,*ya; 1665 Mat_SeqBAIJ *xb,*yb; 1666 1667 PetscFunctionBegin; 1668 if (str == SAME_NONZERO_PATTERN) { 1669 PetscScalar alpha = a; 1670 xa = (Mat_SeqSBAIJ*)xx->A->data; 1671 ya = (Mat_SeqSBAIJ*)yy->A->data; 1672 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1673 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1674 xb = (Mat_SeqBAIJ*)xx->B->data; 1675 yb = (Mat_SeqBAIJ*)yy->B->data; 1676 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1677 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1678 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1679 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1680 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1681 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1682 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1683 } else { 1684 Mat B; 1685 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1686 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1687 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1688 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1689 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1690 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1691 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1692 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1693 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1694 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1695 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1696 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1697 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1698 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1699 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1700 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1701 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1702 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1703 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1704 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1705 } 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1711 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1712 { 1713 PetscErrorCode ierr; 1714 PetscInt i; 1715 PetscBool flg; 1716 1717 PetscFunctionBegin; 1718 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1719 for (i=0; i<n; i++) { 1720 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1721 if (!flg) { 1722 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1723 } 1724 } 1725 PetscFunctionReturn(0); 1726 } 1727 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatShift_MPISBAIJ" 1730 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1731 { 1732 PetscErrorCode ierr; 1733 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1734 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1735 1736 PetscFunctionBegin; 1737 if (!Y->preallocated) { 1738 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1739 } else if (!aij->nz) { 1740 PetscInt nonew = aij->nonew; 1741 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1742 aij->nonew = nonew; 1743 } 1744 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1745 PetscFunctionReturn(0); 1746 } 1747 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ" 1750 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1751 { 1752 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1753 PetscErrorCode ierr; 1754 1755 PetscFunctionBegin; 1756 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1757 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1758 if (d) { 1759 PetscInt rstart; 1760 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1761 *d += rstart/A->rmap->bs; 1762 1763 } 1764 PetscFunctionReturn(0); 1765 } 1766 1767 1768 /* -------------------------------------------------------------------*/ 1769 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1770 MatGetRow_MPISBAIJ, 1771 MatRestoreRow_MPISBAIJ, 1772 MatMult_MPISBAIJ, 1773 /* 4*/ MatMultAdd_MPISBAIJ, 1774 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1775 MatMultAdd_MPISBAIJ, 1776 0, 1777 0, 1778 0, 1779 /* 10*/ 0, 1780 0, 1781 0, 1782 MatSOR_MPISBAIJ, 1783 MatTranspose_MPISBAIJ, 1784 /* 15*/ MatGetInfo_MPISBAIJ, 1785 MatEqual_MPISBAIJ, 1786 MatGetDiagonal_MPISBAIJ, 1787 MatDiagonalScale_MPISBAIJ, 1788 MatNorm_MPISBAIJ, 1789 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1790 MatAssemblyEnd_MPISBAIJ, 1791 MatSetOption_MPISBAIJ, 1792 MatZeroEntries_MPISBAIJ, 1793 /* 24*/ 0, 1794 0, 1795 0, 1796 0, 1797 0, 1798 /* 29*/ MatSetUp_MPISBAIJ, 1799 0, 1800 0, 1801 0, 1802 0, 1803 /* 34*/ MatDuplicate_MPISBAIJ, 1804 0, 1805 0, 1806 0, 1807 0, 1808 /* 39*/ MatAXPY_MPISBAIJ, 1809 MatGetSubMatrices_MPISBAIJ, 1810 MatIncreaseOverlap_MPISBAIJ, 1811 MatGetValues_MPISBAIJ, 1812 MatCopy_MPISBAIJ, 1813 /* 44*/ 0, 1814 MatScale_MPISBAIJ, 1815 MatShift_MPISBAIJ, 1816 0, 1817 0, 1818 /* 49*/ 0, 1819 0, 1820 0, 1821 0, 1822 0, 1823 /* 54*/ 0, 1824 0, 1825 MatSetUnfactored_MPISBAIJ, 1826 0, 1827 MatSetValuesBlocked_MPISBAIJ, 1828 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1829 0, 1830 0, 1831 0, 1832 0, 1833 /* 64*/ 0, 1834 0, 1835 0, 1836 0, 1837 0, 1838 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1839 0, 1840 0, 1841 0, 1842 0, 1843 /* 74*/ 0, 1844 0, 1845 0, 1846 0, 1847 0, 1848 /* 79*/ 0, 1849 0, 1850 0, 1851 0, 1852 MatLoad_MPISBAIJ, 1853 /* 84*/ 0, 1854 0, 1855 0, 1856 0, 1857 0, 1858 /* 89*/ 0, 1859 0, 1860 0, 1861 0, 1862 0, 1863 /* 94*/ 0, 1864 0, 1865 0, 1866 0, 1867 0, 1868 /* 99*/ 0, 1869 0, 1870 0, 1871 0, 1872 0, 1873 /*104*/ 0, 1874 MatRealPart_MPISBAIJ, 1875 MatImaginaryPart_MPISBAIJ, 1876 MatGetRowUpperTriangular_MPISBAIJ, 1877 MatRestoreRowUpperTriangular_MPISBAIJ, 1878 /*109*/ 0, 1879 0, 1880 0, 1881 0, 1882 MatMissingDiagonal_MPISBAIJ, 1883 /*114*/ 0, 1884 0, 1885 0, 1886 0, 1887 0, 1888 /*119*/ 0, 1889 0, 1890 0, 1891 0, 1892 0, 1893 /*124*/ 0, 1894 0, 1895 0, 1896 0, 1897 0, 1898 /*129*/ 0, 1899 0, 1900 0, 1901 0, 1902 0, 1903 /*134*/ 0, 1904 0, 1905 0, 1906 0, 1907 0, 1908 /*139*/ 0, 1909 0, 1910 0, 1911 0, 1912 0, 1913 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1914 }; 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1918 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1919 { 1920 PetscFunctionBegin; 1921 *a = ((Mat_MPISBAIJ*)A->data)->A; 1922 PetscFunctionReturn(0); 1923 } 1924 1925 #undef __FUNCT__ 1926 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1927 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1928 { 1929 Mat_MPISBAIJ *b; 1930 PetscErrorCode ierr; 1931 PetscInt i,mbs,Mbs; 1932 1933 PetscFunctionBegin; 1934 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1935 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1936 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1937 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1938 1939 b = (Mat_MPISBAIJ*)B->data; 1940 mbs = B->rmap->n/bs; 1941 Mbs = B->rmap->N/bs; 1942 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1943 1944 B->rmap->bs = bs; 1945 b->bs2 = bs*bs; 1946 b->mbs = mbs; 1947 b->Mbs = Mbs; 1948 b->nbs = B->cmap->n/bs; 1949 b->Nbs = B->cmap->N/bs; 1950 1951 for (i=0; i<=b->size; i++) { 1952 b->rangebs[i] = B->rmap->range[i]/bs; 1953 } 1954 b->rstartbs = B->rmap->rstart/bs; 1955 b->rendbs = B->rmap->rend/bs; 1956 1957 b->cstartbs = B->cmap->rstart/bs; 1958 b->cendbs = B->cmap->rend/bs; 1959 1960 if (!B->preallocated) { 1961 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1962 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1963 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1964 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1965 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1966 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1967 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1968 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1969 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1970 } 1971 1972 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1973 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1974 1975 B->preallocated = PETSC_TRUE; 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1981 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1982 { 1983 PetscInt m,rstart,cstart,cend; 1984 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1985 const PetscInt *JJ =0; 1986 PetscScalar *values=0; 1987 PetscErrorCode ierr; 1988 1989 PetscFunctionBegin; 1990 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1991 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1992 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1993 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1994 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1995 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1996 m = B->rmap->n/bs; 1997 rstart = B->rmap->rstart/bs; 1998 cstart = B->cmap->rstart/bs; 1999 cend = B->cmap->rend/bs; 2000 2001 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2002 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2003 for (i=0; i<m; i++) { 2004 nz = ii[i+1] - ii[i]; 2005 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2006 nz_max = PetscMax(nz_max,nz); 2007 JJ = jj + ii[i]; 2008 for (j=0; j<nz; j++) { 2009 if (*JJ >= cstart) break; 2010 JJ++; 2011 } 2012 d = 0; 2013 for (; j<nz; j++) { 2014 if (*JJ++ >= cend) break; 2015 d++; 2016 } 2017 d_nnz[i] = d; 2018 o_nnz[i] = nz - d; 2019 } 2020 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2021 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2022 2023 values = (PetscScalar*)V; 2024 if (!values) { 2025 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2026 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2027 } 2028 for (i=0; i<m; i++) { 2029 PetscInt row = i + rstart; 2030 PetscInt ncols = ii[i+1] - ii[i]; 2031 const PetscInt *icols = jj + ii[i]; 2032 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2033 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2034 } 2035 2036 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2037 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2038 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2039 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2040 PetscFunctionReturn(0); 2041 } 2042 2043 /*MC 2044 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2045 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2046 the matrix is stored. 2047 2048 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2049 can call MatSetOption(Mat, MAT_HERMITIAN); 2050 2051 Options Database Keys: 2052 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2053 2054 Level: beginner 2055 2056 .seealso: MatCreateMPISBAIJ 2057 M*/ 2058 2059 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2060 2061 #undef __FUNCT__ 2062 #define __FUNCT__ "MatCreate_MPISBAIJ" 2063 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2064 { 2065 Mat_MPISBAIJ *b; 2066 PetscErrorCode ierr; 2067 PetscBool flg = PETSC_FALSE; 2068 2069 PetscFunctionBegin; 2070 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2071 B->data = (void*)b; 2072 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2073 2074 B->ops->destroy = MatDestroy_MPISBAIJ; 2075 B->ops->view = MatView_MPISBAIJ; 2076 B->assembled = PETSC_FALSE; 2077 B->insertmode = NOT_SET_VALUES; 2078 2079 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2080 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2081 2082 /* build local table of row and column ownerships */ 2083 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2084 2085 /* build cache for off array entries formed */ 2086 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2087 2088 b->donotstash = PETSC_FALSE; 2089 b->colmap = NULL; 2090 b->garray = NULL; 2091 b->roworiented = PETSC_TRUE; 2092 2093 /* stuff used in block assembly */ 2094 b->barray = 0; 2095 2096 /* stuff used for matrix vector multiply */ 2097 b->lvec = 0; 2098 b->Mvctx = 0; 2099 b->slvec0 = 0; 2100 b->slvec0b = 0; 2101 b->slvec1 = 0; 2102 b->slvec1a = 0; 2103 b->slvec1b = 0; 2104 b->sMvctx = 0; 2105 2106 /* stuff for MatGetRow() */ 2107 b->rowindices = 0; 2108 b->rowvalues = 0; 2109 b->getrowactive = PETSC_FALSE; 2110 2111 /* hash table stuff */ 2112 b->ht = 0; 2113 b->hd = 0; 2114 b->ht_size = 0; 2115 b->ht_flag = PETSC_FALSE; 2116 b->ht_fact = 0; 2117 b->ht_total_ct = 0; 2118 b->ht_insert_ct = 0; 2119 2120 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2121 b->ijonly = PETSC_FALSE; 2122 2123 b->in_loc = 0; 2124 b->v_loc = 0; 2125 b->n_loc = 0; 2126 2127 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2128 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2129 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2130 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2131 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2132 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 2133 #if defined(PETSC_HAVE_ELEMENTAL) 2134 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2135 #endif 2136 2137 B->symmetric = PETSC_TRUE; 2138 B->structurally_symmetric = PETSC_TRUE; 2139 B->symmetric_set = PETSC_TRUE; 2140 B->structurally_symmetric_set = PETSC_TRUE; 2141 2142 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2143 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2144 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2145 if (flg) { 2146 PetscReal fact = 1.39; 2147 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2148 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2149 if (fact <= 1.0) fact = 1.39; 2150 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2151 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2152 } 2153 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 /*MC 2158 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2159 2160 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2161 and MATMPISBAIJ otherwise. 2162 2163 Options Database Keys: 2164 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2165 2166 Level: beginner 2167 2168 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2169 M*/ 2170 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2173 /*@C 2174 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2175 the user should preallocate the matrix storage by setting the parameters 2176 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2177 performance can be increased by more than a factor of 50. 2178 2179 Collective on Mat 2180 2181 Input Parameters: 2182 + B - the matrix 2183 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2184 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2185 . d_nz - number of block nonzeros per block row in diagonal portion of local 2186 submatrix (same for all local rows) 2187 . d_nnz - array containing the number of block nonzeros in the various block rows 2188 in the upper triangular and diagonal part of the in diagonal portion of the local 2189 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2190 for the diagonal entry and set a value even if it is zero. 2191 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2192 submatrix (same for all local rows). 2193 - o_nnz - array containing the number of nonzeros in the various block rows of the 2194 off-diagonal portion of the local submatrix that is right of the diagonal 2195 (possibly different for each block row) or NULL. 2196 2197 2198 Options Database Keys: 2199 . -mat_no_unroll - uses code that does not unroll the loops in the 2200 block calculations (much slower) 2201 . -mat_block_size - size of the blocks to use 2202 2203 Notes: 2204 2205 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2206 than it must be used on all processors that share the object for that argument. 2207 2208 If the *_nnz parameter is given then the *_nz parameter is ignored 2209 2210 Storage Information: 2211 For a square global matrix we define each processor's diagonal portion 2212 to be its local rows and the corresponding columns (a square submatrix); 2213 each processor's off-diagonal portion encompasses the remainder of the 2214 local matrix (a rectangular submatrix). 2215 2216 The user can specify preallocated storage for the diagonal part of 2217 the local submatrix with either d_nz or d_nnz (not both). Set 2218 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2219 memory allocation. Likewise, specify preallocated storage for the 2220 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2221 2222 You can call MatGetInfo() to get information on how effective the preallocation was; 2223 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2224 You can also run with the option -info and look for messages with the string 2225 malloc in them to see if additional memory allocation was needed. 2226 2227 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2228 the figure below we depict these three local rows and all columns (0-11). 2229 2230 .vb 2231 0 1 2 3 4 5 6 7 8 9 10 11 2232 -------------------------- 2233 row 3 |. . . d d d o o o o o o 2234 row 4 |. . . d d d o o o o o o 2235 row 5 |. . . d d d o o o o o o 2236 -------------------------- 2237 .ve 2238 2239 Thus, any entries in the d locations are stored in the d (diagonal) 2240 submatrix, and any entries in the o locations are stored in the 2241 o (off-diagonal) submatrix. Note that the d matrix is stored in 2242 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2243 2244 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2245 plus the diagonal part of the d matrix, 2246 and o_nz should indicate the number of block nonzeros per row in the o matrix 2247 2248 In general, for PDE problems in which most nonzeros are near the diagonal, 2249 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2250 or you will get TERRIBLE performance; see the users' manual chapter on 2251 matrices. 2252 2253 Level: intermediate 2254 2255 .keywords: matrix, block, aij, compressed row, sparse, parallel 2256 2257 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2258 @*/ 2259 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2265 PetscValidType(B,1); 2266 PetscValidLogicalCollectiveInt(B,bs,2); 2267 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatCreateSBAIJ" 2273 /*@C 2274 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2275 (block compressed row). For good matrix assembly performance 2276 the user should preallocate the matrix storage by setting the parameters 2277 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2278 performance can be increased by more than a factor of 50. 2279 2280 Collective on MPI_Comm 2281 2282 Input Parameters: 2283 + comm - MPI communicator 2284 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2285 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2286 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2287 This value should be the same as the local size used in creating the 2288 y vector for the matrix-vector product y = Ax. 2289 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2290 This value should be the same as the local size used in creating the 2291 x vector for the matrix-vector product y = Ax. 2292 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2293 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2294 . d_nz - number of block nonzeros per block row in diagonal portion of local 2295 submatrix (same for all local rows) 2296 . d_nnz - array containing the number of block nonzeros in the various block rows 2297 in the upper triangular portion of the in diagonal portion of the local 2298 (possibly different for each block block row) or NULL. 2299 If you plan to factor the matrix you must leave room for the diagonal entry and 2300 set its value even if it is zero. 2301 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2302 submatrix (same for all local rows). 2303 - o_nnz - array containing the number of nonzeros in the various block rows of the 2304 off-diagonal portion of the local submatrix (possibly different for 2305 each block row) or NULL. 2306 2307 Output Parameter: 2308 . A - the matrix 2309 2310 Options Database Keys: 2311 . -mat_no_unroll - uses code that does not unroll the loops in the 2312 block calculations (much slower) 2313 . -mat_block_size - size of the blocks to use 2314 . -mat_mpi - use the parallel matrix data structures even on one processor 2315 (defaults to using SeqBAIJ format on one processor) 2316 2317 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2318 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2319 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2320 2321 Notes: 2322 The number of rows and columns must be divisible by blocksize. 2323 This matrix type does not support complex Hermitian operation. 2324 2325 The user MUST specify either the local or global matrix dimensions 2326 (possibly both). 2327 2328 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2329 than it must be used on all processors that share the object for that argument. 2330 2331 If the *_nnz parameter is given then the *_nz parameter is ignored 2332 2333 Storage Information: 2334 For a square global matrix we define each processor's diagonal portion 2335 to be its local rows and the corresponding columns (a square submatrix); 2336 each processor's off-diagonal portion encompasses the remainder of the 2337 local matrix (a rectangular submatrix). 2338 2339 The user can specify preallocated storage for the diagonal part of 2340 the local submatrix with either d_nz or d_nnz (not both). Set 2341 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2342 memory allocation. Likewise, specify preallocated storage for the 2343 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2344 2345 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2346 the figure below we depict these three local rows and all columns (0-11). 2347 2348 .vb 2349 0 1 2 3 4 5 6 7 8 9 10 11 2350 -------------------------- 2351 row 3 |. . . d d d o o o o o o 2352 row 4 |. . . d d d o o o o o o 2353 row 5 |. . . d d d o o o o o o 2354 -------------------------- 2355 .ve 2356 2357 Thus, any entries in the d locations are stored in the d (diagonal) 2358 submatrix, and any entries in the o locations are stored in the 2359 o (off-diagonal) submatrix. Note that the d matrix is stored in 2360 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2361 2362 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2363 plus the diagonal part of the d matrix, 2364 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2365 In general, for PDE problems in which most nonzeros are near the diagonal, 2366 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2367 or you will get TERRIBLE performance; see the users' manual chapter on 2368 matrices. 2369 2370 Level: intermediate 2371 2372 .keywords: matrix, block, aij, compressed row, sparse, parallel 2373 2374 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2375 @*/ 2376 2377 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2378 { 2379 PetscErrorCode ierr; 2380 PetscMPIInt size; 2381 2382 PetscFunctionBegin; 2383 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2384 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2385 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2386 if (size > 1) { 2387 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2388 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2389 } else { 2390 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2391 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2392 } 2393 PetscFunctionReturn(0); 2394 } 2395 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2399 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2400 { 2401 Mat mat; 2402 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2403 PetscErrorCode ierr; 2404 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2405 PetscScalar *array; 2406 2407 PetscFunctionBegin; 2408 *newmat = 0; 2409 2410 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2411 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2412 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2413 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2414 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2415 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2416 2417 mat->factortype = matin->factortype; 2418 mat->preallocated = PETSC_TRUE; 2419 mat->assembled = PETSC_TRUE; 2420 mat->insertmode = NOT_SET_VALUES; 2421 2422 a = (Mat_MPISBAIJ*)mat->data; 2423 a->bs2 = oldmat->bs2; 2424 a->mbs = oldmat->mbs; 2425 a->nbs = oldmat->nbs; 2426 a->Mbs = oldmat->Mbs; 2427 a->Nbs = oldmat->Nbs; 2428 2429 2430 a->size = oldmat->size; 2431 a->rank = oldmat->rank; 2432 a->donotstash = oldmat->donotstash; 2433 a->roworiented = oldmat->roworiented; 2434 a->rowindices = 0; 2435 a->rowvalues = 0; 2436 a->getrowactive = PETSC_FALSE; 2437 a->barray = 0; 2438 a->rstartbs = oldmat->rstartbs; 2439 a->rendbs = oldmat->rendbs; 2440 a->cstartbs = oldmat->cstartbs; 2441 a->cendbs = oldmat->cendbs; 2442 2443 /* hash table stuff */ 2444 a->ht = 0; 2445 a->hd = 0; 2446 a->ht_size = 0; 2447 a->ht_flag = oldmat->ht_flag; 2448 a->ht_fact = oldmat->ht_fact; 2449 a->ht_total_ct = 0; 2450 a->ht_insert_ct = 0; 2451 2452 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2453 if (oldmat->colmap) { 2454 #if defined(PETSC_USE_CTABLE) 2455 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2456 #else 2457 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2458 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2459 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2460 #endif 2461 } else a->colmap = 0; 2462 2463 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2464 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2465 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2466 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2467 } else a->garray = 0; 2468 2469 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2470 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2471 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2472 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2473 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2474 2475 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2476 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2477 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2478 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2479 2480 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2481 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2482 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2483 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2484 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2485 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2486 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2487 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2488 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2489 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2490 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2491 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2492 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2493 2494 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2495 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2496 a->sMvctx = oldmat->sMvctx; 2497 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2498 2499 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2500 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2501 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2502 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2503 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2504 *newmat = mat; 2505 PetscFunctionReturn(0); 2506 } 2507 2508 #undef __FUNCT__ 2509 #define __FUNCT__ "MatLoad_MPISBAIJ" 2510 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2511 { 2512 PetscErrorCode ierr; 2513 PetscInt i,nz,j,rstart,rend; 2514 PetscScalar *vals,*buf; 2515 MPI_Comm comm; 2516 MPI_Status status; 2517 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2518 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2519 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2520 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2521 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2522 PetscInt dcount,kmax,k,nzcount,tmp; 2523 int fd; 2524 2525 PetscFunctionBegin; 2526 /* force binary viewer to load .info file if it has not yet done so */ 2527 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2528 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2529 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2530 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2531 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2532 if (bs < 0) bs = 1; 2533 2534 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2535 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2536 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2537 if (!rank) { 2538 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2539 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2540 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2541 } 2542 2543 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2544 M = header[1]; 2545 N = header[2]; 2546 2547 /* If global sizes are set, check if they are consistent with that given in the file */ 2548 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2549 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2550 2551 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2552 2553 /* 2554 This code adds extra rows to make sure the number of rows is 2555 divisible by the blocksize 2556 */ 2557 Mbs = M/bs; 2558 extra_rows = bs - M + bs*(Mbs); 2559 if (extra_rows == bs) extra_rows = 0; 2560 else Mbs++; 2561 if (extra_rows &&!rank) { 2562 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2563 } 2564 2565 /* determine ownership of all rows */ 2566 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2567 mbs = Mbs/size + ((Mbs % size) > rank); 2568 m = mbs*bs; 2569 } else { /* User Set */ 2570 m = newmat->rmap->n; 2571 mbs = m/bs; 2572 } 2573 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2574 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2575 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2576 rowners[0] = 0; 2577 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2578 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2579 rstart = rowners[rank]; 2580 rend = rowners[rank+1]; 2581 2582 /* distribute row lengths to all processors */ 2583 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2584 if (!rank) { 2585 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2586 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2587 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2588 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2589 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2590 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2591 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2592 } else { 2593 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2594 } 2595 2596 if (!rank) { /* procs[0] */ 2597 /* calculate the number of nonzeros on each processor */ 2598 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2599 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2600 for (i=0; i<size; i++) { 2601 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2602 procsnz[i] += rowlengths[j]; 2603 } 2604 } 2605 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2606 2607 /* determine max buffer needed and allocate it */ 2608 maxnz = 0; 2609 for (i=0; i<size; i++) { 2610 maxnz = PetscMax(maxnz,procsnz[i]); 2611 } 2612 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2613 2614 /* read in my part of the matrix column indices */ 2615 nz = procsnz[0]; 2616 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2617 mycols = ibuf; 2618 if (size == 1) nz -= extra_rows; 2619 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2620 if (size == 1) { 2621 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2622 } 2623 2624 /* read in every ones (except the last) and ship off */ 2625 for (i=1; i<size-1; i++) { 2626 nz = procsnz[i]; 2627 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2628 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2629 } 2630 /* read in the stuff for the last proc */ 2631 if (size != 1) { 2632 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2633 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2634 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2635 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2636 } 2637 ierr = PetscFree(cols);CHKERRQ(ierr); 2638 } else { /* procs[i], i>0 */ 2639 /* determine buffer space needed for message */ 2640 nz = 0; 2641 for (i=0; i<m; i++) nz += locrowlens[i]; 2642 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2643 mycols = ibuf; 2644 /* receive message of column indices*/ 2645 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2646 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2647 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2648 } 2649 2650 /* loop over local rows, determining number of off diagonal entries */ 2651 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2652 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2653 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2654 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2655 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2656 rowcount = 0; 2657 nzcount = 0; 2658 for (i=0; i<mbs; i++) { 2659 dcount = 0; 2660 odcount = 0; 2661 for (j=0; j<bs; j++) { 2662 kmax = locrowlens[rowcount]; 2663 for (k=0; k<kmax; k++) { 2664 tmp = mycols[nzcount++]/bs; /* block col. index */ 2665 if (!mask[tmp]) { 2666 mask[tmp] = 1; 2667 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2668 else masked1[dcount++] = tmp; /* entry in diag portion */ 2669 } 2670 } 2671 rowcount++; 2672 } 2673 2674 dlens[i] = dcount; /* d_nzz[i] */ 2675 odlens[i] = odcount; /* o_nzz[i] */ 2676 2677 /* zero out the mask elements we set */ 2678 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2679 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2680 } 2681 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2682 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2683 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2684 2685 if (!rank) { 2686 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2687 /* read in my part of the matrix numerical values */ 2688 nz = procsnz[0]; 2689 vals = buf; 2690 mycols = ibuf; 2691 if (size == 1) nz -= extra_rows; 2692 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2693 if (size == 1) { 2694 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2695 } 2696 2697 /* insert into matrix */ 2698 jj = rstart*bs; 2699 for (i=0; i<m; i++) { 2700 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2701 mycols += locrowlens[i]; 2702 vals += locrowlens[i]; 2703 jj++; 2704 } 2705 2706 /* read in other processors (except the last one) and ship out */ 2707 for (i=1; i<size-1; i++) { 2708 nz = procsnz[i]; 2709 vals = buf; 2710 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2711 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2712 } 2713 /* the last proc */ 2714 if (size != 1) { 2715 nz = procsnz[i] - extra_rows; 2716 vals = buf; 2717 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2718 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2719 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2720 } 2721 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2722 2723 } else { 2724 /* receive numeric values */ 2725 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2726 2727 /* receive message of values*/ 2728 vals = buf; 2729 mycols = ibuf; 2730 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2731 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2732 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2733 2734 /* insert into matrix */ 2735 jj = rstart*bs; 2736 for (i=0; i<m; i++) { 2737 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2738 mycols += locrowlens[i]; 2739 vals += locrowlens[i]; 2740 jj++; 2741 } 2742 } 2743 2744 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2745 ierr = PetscFree(buf);CHKERRQ(ierr); 2746 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2747 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2748 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2749 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2750 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2751 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2752 PetscFunctionReturn(0); 2753 } 2754 2755 #undef __FUNCT__ 2756 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2757 /*XXXXX@ 2758 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2759 2760 Input Parameters: 2761 . mat - the matrix 2762 . fact - factor 2763 2764 Not Collective on Mat, each process can have a different hash factor 2765 2766 Level: advanced 2767 2768 Notes: 2769 This can also be set by the command line option: -mat_use_hash_table fact 2770 2771 .keywords: matrix, hashtable, factor, HT 2772 2773 .seealso: MatSetOption() 2774 @XXXXX*/ 2775 2776 2777 #undef __FUNCT__ 2778 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2779 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2780 { 2781 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2782 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2783 PetscReal atmp; 2784 PetscReal *work,*svalues,*rvalues; 2785 PetscErrorCode ierr; 2786 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2787 PetscMPIInt rank,size; 2788 PetscInt *rowners_bs,dest,count,source; 2789 PetscScalar *va; 2790 MatScalar *ba; 2791 MPI_Status stat; 2792 2793 PetscFunctionBegin; 2794 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2795 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2796 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2797 2798 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2799 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2800 2801 bs = A->rmap->bs; 2802 mbs = a->mbs; 2803 Mbs = a->Mbs; 2804 ba = b->a; 2805 bi = b->i; 2806 bj = b->j; 2807 2808 /* find ownerships */ 2809 rowners_bs = A->rmap->range; 2810 2811 /* each proc creates an array to be distributed */ 2812 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2813 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2814 2815 /* row_max for B */ 2816 if (rank != size-1) { 2817 for (i=0; i<mbs; i++) { 2818 ncols = bi[1] - bi[0]; bi++; 2819 brow = bs*i; 2820 for (j=0; j<ncols; j++) { 2821 bcol = bs*(*bj); 2822 for (kcol=0; kcol<bs; kcol++) { 2823 col = bcol + kcol; /* local col index */ 2824 col += rowners_bs[rank+1]; /* global col index */ 2825 for (krow=0; krow<bs; krow++) { 2826 atmp = PetscAbsScalar(*ba); ba++; 2827 row = brow + krow; /* local row index */ 2828 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2829 if (work[col] < atmp) work[col] = atmp; 2830 } 2831 } 2832 bj++; 2833 } 2834 } 2835 2836 /* send values to its owners */ 2837 for (dest=rank+1; dest<size; dest++) { 2838 svalues = work + rowners_bs[dest]; 2839 count = rowners_bs[dest+1]-rowners_bs[dest]; 2840 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2841 } 2842 } 2843 2844 /* receive values */ 2845 if (rank) { 2846 rvalues = work; 2847 count = rowners_bs[rank+1]-rowners_bs[rank]; 2848 for (source=0; source<rank; source++) { 2849 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2850 /* process values */ 2851 for (i=0; i<count; i++) { 2852 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2853 } 2854 } 2855 } 2856 2857 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2858 ierr = PetscFree(work);CHKERRQ(ierr); 2859 PetscFunctionReturn(0); 2860 } 2861 2862 #undef __FUNCT__ 2863 #define __FUNCT__ "MatSOR_MPISBAIJ" 2864 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2865 { 2866 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2867 PetscErrorCode ierr; 2868 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2869 PetscScalar *x,*ptr,*from; 2870 Vec bb1; 2871 const PetscScalar *b; 2872 2873 PetscFunctionBegin; 2874 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2875 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2876 2877 if (flag == SOR_APPLY_UPPER) { 2878 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2879 PetscFunctionReturn(0); 2880 } 2881 2882 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2883 if (flag & SOR_ZERO_INITIAL_GUESS) { 2884 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2885 its--; 2886 } 2887 2888 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2889 while (its--) { 2890 2891 /* lower triangular part: slvec0b = - B^T*xx */ 2892 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2893 2894 /* copy xx into slvec0a */ 2895 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2896 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2897 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2898 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2899 2900 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2901 2902 /* copy bb into slvec1a */ 2903 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2904 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2905 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2906 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2907 2908 /* set slvec1b = 0 */ 2909 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2910 2911 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2912 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2913 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2914 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2915 2916 /* upper triangular part: bb1 = bb1 - B*x */ 2917 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2918 2919 /* local diagonal sweep */ 2920 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2921 } 2922 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2923 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2924 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2925 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2926 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2927 } else if (flag & SOR_EISENSTAT) { 2928 Vec xx1; 2929 PetscBool hasop; 2930 const PetscScalar *diag; 2931 PetscScalar *sl,scale = (omega - 2.0)/omega; 2932 PetscInt i,n; 2933 2934 if (!mat->xx1) { 2935 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2936 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2937 } 2938 xx1 = mat->xx1; 2939 bb1 = mat->bb1; 2940 2941 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2942 2943 if (!mat->diag) { 2944 /* this is wrong for same matrix with new nonzero values */ 2945 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2946 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2947 } 2948 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2949 2950 if (hasop) { 2951 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2952 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2953 } else { 2954 /* 2955 These two lines are replaced by code that may be a bit faster for a good compiler 2956 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2957 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2958 */ 2959 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2960 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2961 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2962 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2963 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2964 if (omega == 1.0) { 2965 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2966 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2967 } else { 2968 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2969 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2970 } 2971 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2972 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2973 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2974 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2975 } 2976 2977 /* multiply off-diagonal portion of matrix */ 2978 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2979 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2980 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2981 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2982 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2983 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2984 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2985 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2986 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2987 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2988 2989 /* local sweep */ 2990 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2991 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2992 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2993 PetscFunctionReturn(0); 2994 } 2995 2996 #undef __FUNCT__ 2997 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2998 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2999 { 3000 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3001 PetscErrorCode ierr; 3002 Vec lvec1,bb1; 3003 3004 PetscFunctionBegin; 3005 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3006 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3007 3008 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3009 if (flag & SOR_ZERO_INITIAL_GUESS) { 3010 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3011 its--; 3012 } 3013 3014 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3015 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3016 while (its--) { 3017 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3018 3019 /* lower diagonal part: bb1 = bb - B^T*xx */ 3020 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3021 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3022 3023 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3024 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3025 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3026 3027 /* upper diagonal part: bb1 = bb1 - B*x */ 3028 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3029 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3030 3031 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3032 3033 /* diagonal sweep */ 3034 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3035 } 3036 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3037 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3038 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3039 PetscFunctionReturn(0); 3040 } 3041 3042 #undef __FUNCT__ 3043 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3044 /*@ 3045 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3046 CSR format the local rows. 3047 3048 Collective on MPI_Comm 3049 3050 Input Parameters: 3051 + comm - MPI communicator 3052 . bs - the block size, only a block size of 1 is supported 3053 . m - number of local rows (Cannot be PETSC_DECIDE) 3054 . n - This value should be the same as the local size used in creating the 3055 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3056 calculated if N is given) For square matrices n is almost always m. 3057 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3058 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3059 . i - row indices 3060 . j - column indices 3061 - a - matrix values 3062 3063 Output Parameter: 3064 . mat - the matrix 3065 3066 Level: intermediate 3067 3068 Notes: 3069 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3070 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3071 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3072 3073 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3074 3075 .keywords: matrix, aij, compressed row, sparse, parallel 3076 3077 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3078 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3079 @*/ 3080 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3081 { 3082 PetscErrorCode ierr; 3083 3084 3085 PetscFunctionBegin; 3086 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3087 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3088 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3089 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3090 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3091 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3092 PetscFunctionReturn(0); 3093 } 3094 3095 3096 #undef __FUNCT__ 3097 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3098 /*@C 3099 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3100 (the default parallel PETSc format). 3101 3102 Collective on MPI_Comm 3103 3104 Input Parameters: 3105 + B - the matrix 3106 . bs - the block size 3107 . i - the indices into j for the start of each local row (starts with zero) 3108 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3109 - v - optional values in the matrix 3110 3111 Level: developer 3112 3113 .keywords: matrix, aij, compressed row, sparse, parallel 3114 3115 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3116 @*/ 3117 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3118 { 3119 PetscErrorCode ierr; 3120 3121 PetscFunctionBegin; 3122 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3123 PetscFunctionReturn(0); 3124 } 3125 3126 #undef __FUNCT__ 3127 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3128 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3129 { 3130 PetscErrorCode ierr; 3131 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3132 PetscInt *indx; 3133 PetscScalar *values; 3134 3135 PetscFunctionBegin; 3136 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3137 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3138 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3139 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3140 PetscInt *bindx,rmax=a->rmax,j; 3141 3142 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3143 mbs = m/bs; Nbs = N/cbs; 3144 if (n == PETSC_DECIDE) { 3145 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3146 } 3147 /* Check sum(n) = Nbs */ 3148 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3149 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3150 3151 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3152 rstart -= mbs; 3153 3154 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3155 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3156 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3157 for (i=0; i<mbs; i++) { 3158 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3159 nnz = nnz/bs; 3160 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3161 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3162 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3163 } 3164 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3165 ierr = PetscFree(bindx);CHKERRQ(ierr); 3166 3167 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3168 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3169 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3170 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3171 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3172 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3173 } 3174 3175 /* numeric phase */ 3176 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3177 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3178 3179 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3180 for (i=0; i<m; i++) { 3181 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3182 Ii = i + rstart; 3183 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3184 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3185 } 3186 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3187 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3188 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3189 PetscFunctionReturn(0); 3190 } 3191