1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 } 637 /* Bmat */ 638 v = bmat->a; jj = bmat->j; 639 for (brow=0; brow<mbs; brow++) { 640 grow = bs*(rstart + brow); 641 nz = bmat->i[brow+1] - bmat->i[brow]; 642 for (bcol=0; bcol<nz; bcol++) { 643 gcol = bs*garray[*jj]; jj++; 644 for (col=0; col<bs; col++) { 645 for (row=0; row<bs; row++) { 646 vabs = PetscAbsScalar(*v); v++; 647 rsum[gcol+col] += vabs; 648 rsum[grow+row] += vabs; 649 } 650 } 651 } 652 } 653 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 654 *norm = 0.0; 655 for (col=0; col<mat->cmap->N; col++) { 656 if (rsum2[col] > *norm) *norm = rsum2[col]; 657 } 658 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 660 } 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscInt nstash,reallocs; 671 672 PetscFunctionBegin; 673 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 674 675 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 676 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 677 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 678 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 679 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 680 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 686 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 687 { 688 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 689 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 690 PetscErrorCode ierr; 691 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscMPIInt n; 695 PetscBool r1,r2,r3; 696 MatScalar *val; 697 698 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 699 PetscFunctionBegin; 700 if (!baij->donotstash && !mat->nooffprocentries) { 701 while (1) { 702 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 703 if (!flg) break; 704 705 for (i=0; i<n;) { 706 /* Now identify the consecutive vals belonging to the same row */ 707 for (j=i,rstart=row[j]; j<n; j++) { 708 if (row[j] != rstart) break; 709 } 710 if (j < n) ncols = j-i; 711 else ncols = n-i; 712 /* Now assemble all these values with a single function call */ 713 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 /* Now process the block-stash. Since the values are stashed column-oriented, 719 set the roworiented flag to column oriented, and after MatSetValues() 720 restore the original flags */ 721 r1 = baij->roworiented; 722 r2 = a->roworiented; 723 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 724 725 baij->roworiented = PETSC_FALSE; 726 a->roworiented = PETSC_FALSE; 727 728 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 729 while (1) { 730 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 731 if (!flg) break; 732 733 for (i=0; i<n;) { 734 /* Now identify the consecutive vals belonging to the same row */ 735 for (j=i,rstart=row[j]; j<n; j++) { 736 if (row[j] != rstart) break; 737 } 738 if (j < n) ncols = j-i; 739 else ncols = n-i; 740 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 741 i = j; 742 } 743 } 744 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 745 746 baij->roworiented = r1; 747 a->roworiented = r2; 748 749 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 750 } 751 752 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 753 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 754 755 /* determine if any processor has disassembled, if so we must 756 also disassemble ourselfs, in order that we may reassemble. */ 757 /* 758 if nonzero structure of submatrix B cannot change then we know that 759 no processor disassembled thus we can skip this stuff 760 */ 761 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 762 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 763 if (mat->was_assembled && !other_disassembled) { 764 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 765 } 766 } 767 768 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 769 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 770 } 771 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 772 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 773 774 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 775 776 baij->rowvalues = 0; 777 778 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 779 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 780 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 781 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 782 } 783 PetscFunctionReturn(0); 784 } 785 786 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer); 787 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 788 #include <petscdraw.h> 789 #undef __FUNCT__ 790 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 791 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 792 { 793 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 794 PetscErrorCode ierr; 795 PetscInt bs = mat->rmap->bs; 796 PetscMPIInt rank = baij->rank; 797 PetscBool iascii,isdraw; 798 PetscViewer sviewer; 799 PetscViewerFormat format; 800 801 PetscFunctionBegin; 802 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 803 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 804 if (iascii) { 805 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 806 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 807 MatInfo info; 808 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 809 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 810 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 811 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 812 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 814 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 816 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 817 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 818 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 819 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } else if (format == PETSC_VIEWER_ASCII_INFO) { 822 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 825 PetscFunctionReturn(0); 826 } 827 } 828 829 if (isdraw) { 830 PetscDraw draw; 831 PetscBool isnull; 832 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 833 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 834 if (isnull) PetscFunctionReturn(0); 835 } 836 837 { 838 /* assemble the entire matrix onto first processor. */ 839 Mat A; 840 Mat_SeqSBAIJ *Aloc; 841 Mat_SeqBAIJ *Bloc; 842 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 843 MatScalar *a; 844 const char *matname; 845 846 /* Should this be the same type as mat? */ 847 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 848 if (!rank) { 849 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 850 } else { 851 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 852 } 853 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 854 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 855 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 856 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 857 858 /* copy over the A part */ 859 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 860 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 861 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 862 863 for (i=0; i<mbs; i++) { 864 rvals[0] = bs*(baij->rstartbs + i); 865 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 866 for (j=ai[i]; j<ai[i+1]; j++) { 867 col = (baij->cstartbs+aj[j])*bs; 868 for (k=0; k<bs; k++) { 869 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 870 col++; 871 a += bs; 872 } 873 } 874 } 875 /* copy over the B part */ 876 Bloc = (Mat_SeqBAIJ*)baij->B->data; 877 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 878 for (i=0; i<mbs; i++) { 879 880 rvals[0] = bs*(baij->rstartbs + i); 881 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 882 for (j=ai[i]; j<ai[i+1]; j++) { 883 col = baij->garray[aj[j]]*bs; 884 for (k=0; k<bs; k++) { 885 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 886 col++; 887 a += bs; 888 } 889 } 890 } 891 ierr = PetscFree(rvals);CHKERRQ(ierr); 892 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 893 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 894 /* 895 Everyone has to call to draw the matrix since the graphics waits are 896 synchronized across all processors that share the PetscDraw object 897 */ 898 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 899 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 900 if (!rank) { 901 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 902 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 903 } 904 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 905 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 906 ierr = MatDestroy(&A);CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatView_MPISBAIJ" 913 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 914 { 915 PetscErrorCode ierr; 916 PetscBool iascii,isdraw,issocket,isbinary; 917 918 PetscFunctionBegin; 919 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 920 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 921 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 922 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 923 if (iascii || isdraw || issocket || isbinary) { 924 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 925 } 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatDestroy_MPISBAIJ" 931 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 932 { 933 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 934 PetscErrorCode ierr; 935 936 PetscFunctionBegin; 937 #if defined(PETSC_USE_LOG) 938 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 939 #endif 940 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 941 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 942 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 943 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 944 #if defined(PETSC_USE_CTABLE) 945 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 946 #else 947 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 948 #endif 949 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 950 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 951 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 952 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 953 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 954 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 955 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 956 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 957 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 958 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 959 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 960 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 961 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 962 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 963 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 964 #if defined(PETSC_USE_REAL_MAT_SINGLE) 965 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 966 #endif 967 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 968 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 969 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 970 ierr = PetscFree(mat->data);CHKERRQ(ierr); 971 972 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 973 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 974 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 975 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 976 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 977 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 978 #if defined(PETSC_HAVE_ELEMENTAL) 979 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 980 #endif 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 986 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 987 { 988 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 989 PetscErrorCode ierr; 990 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 991 PetscScalar *from; 992 const PetscScalar *x; 993 994 PetscFunctionBegin; 995 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 996 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 997 998 /* diagonal part */ 999 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1000 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1001 1002 /* subdiagonal part */ 1003 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1004 1005 /* copy x into the vec slvec0 */ 1006 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1007 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1008 1009 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1010 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1011 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1012 1013 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1015 /* supperdiagonal part */ 1016 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatMult_MPISBAIJ" 1022 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1023 { 1024 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1025 PetscErrorCode ierr; 1026 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1027 PetscScalar *from; 1028 const PetscScalar *x; 1029 1030 PetscFunctionBegin; 1031 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1032 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1033 1034 /* diagonal part */ 1035 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1036 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1037 1038 /* subdiagonal part */ 1039 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1040 1041 /* copy x into the vec slvec0 */ 1042 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1043 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1044 1045 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1046 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1047 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1048 1049 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1050 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1051 /* supperdiagonal part */ 1052 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1058 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1059 { 1060 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1061 PetscErrorCode ierr; 1062 PetscInt nt; 1063 1064 PetscFunctionBegin; 1065 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1066 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1067 1068 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1069 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1070 1071 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1072 /* do diagonal part */ 1073 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1074 /* do supperdiagonal part */ 1075 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1076 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1077 /* do subdiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1086 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1087 { 1088 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1089 PetscErrorCode ierr; 1090 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1091 PetscScalar *from,zero=0.0; 1092 const PetscScalar *x; 1093 1094 PetscFunctionBegin; 1095 /* 1096 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1097 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1098 */ 1099 /* diagonal part */ 1100 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1101 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1102 1103 /* subdiagonal part */ 1104 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1105 1106 /* copy x into the vec slvec0 */ 1107 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1108 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1109 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1110 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1111 1112 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1114 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 1116 /* supperdiagonal part */ 1117 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1123 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1124 { 1125 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1130 /* do diagonal part */ 1131 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1132 /* do supperdiagonal part */ 1133 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1134 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1135 1136 /* do subdiagonal part */ 1137 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1138 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1139 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 /* 1144 This only works correctly for square matrices where the subblock A->A is the 1145 diagonal block 1146 */ 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1149 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1150 { 1151 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1152 PetscErrorCode ierr; 1153 1154 PetscFunctionBegin; 1155 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1156 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatScale_MPISBAIJ" 1162 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1163 { 1164 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1169 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1175 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1176 { 1177 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1178 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1179 PetscErrorCode ierr; 1180 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1181 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1182 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1183 1184 PetscFunctionBegin; 1185 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1186 mat->getrowactive = PETSC_TRUE; 1187 1188 if (!mat->rowvalues && (idx || v)) { 1189 /* 1190 allocate enough space to hold information from the longest row. 1191 */ 1192 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1193 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1194 PetscInt max = 1,mbs = mat->mbs,tmp; 1195 for (i=0; i<mbs; i++) { 1196 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1197 if (max < tmp) max = tmp; 1198 } 1199 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1200 } 1201 1202 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1203 lrow = row - brstart; /* local row index */ 1204 1205 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1206 if (!v) {pvA = 0; pvB = 0;} 1207 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1208 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1209 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1210 nztot = nzA + nzB; 1211 1212 cmap = mat->garray; 1213 if (v || idx) { 1214 if (nztot) { 1215 /* Sort by increasing column numbers, assuming A and B already sorted */ 1216 PetscInt imark = -1; 1217 if (v) { 1218 *v = v_p = mat->rowvalues; 1219 for (i=0; i<nzB; i++) { 1220 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1221 else break; 1222 } 1223 imark = i; 1224 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1225 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1226 } 1227 if (idx) { 1228 *idx = idx_p = mat->rowindices; 1229 if (imark > -1) { 1230 for (i=0; i<imark; i++) { 1231 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1232 } 1233 } else { 1234 for (i=0; i<nzB; i++) { 1235 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1236 else break; 1237 } 1238 imark = i; 1239 } 1240 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1241 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1242 } 1243 } else { 1244 if (idx) *idx = 0; 1245 if (v) *v = 0; 1246 } 1247 } 1248 *nz = nztot; 1249 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1250 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1256 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1257 { 1258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1259 1260 PetscFunctionBegin; 1261 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1262 baij->getrowactive = PETSC_FALSE; 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1268 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1269 { 1270 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1271 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1272 1273 PetscFunctionBegin; 1274 aA->getrow_utriangular = PETSC_TRUE; 1275 PetscFunctionReturn(0); 1276 } 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1279 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1280 { 1281 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1282 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1283 1284 PetscFunctionBegin; 1285 aA->getrow_utriangular = PETSC_FALSE; 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1291 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1292 { 1293 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1294 PetscErrorCode ierr; 1295 1296 PetscFunctionBegin; 1297 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1298 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1304 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1305 { 1306 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1307 PetscErrorCode ierr; 1308 1309 PetscFunctionBegin; 1310 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1311 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1312 PetscFunctionReturn(0); 1313 } 1314 1315 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1316 Input: isrow - distributed(parallel), 1317 iscol_local - locally owned (seq) 1318 */ 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "ISEqual_private" 1321 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1322 { 1323 PetscErrorCode ierr; 1324 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1325 const PetscInt *ptr1,*ptr2; 1326 1327 PetscFunctionBegin; 1328 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1329 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1330 if (sz1 > sz2) { 1331 *flg = PETSC_FALSE; 1332 PetscFunctionReturn(0); 1333 } 1334 1335 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1336 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1337 1338 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1339 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1340 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1341 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1342 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1343 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1344 1345 nmatch=0; 1346 k = 0; 1347 for (i=0; i<sz1; i++){ 1348 for (j=k; j<sz2; j++){ 1349 if (a1[i] == a2[j]) { 1350 k = j; nmatch++; 1351 break; 1352 } 1353 } 1354 } 1355 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1356 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1357 ierr = PetscFree(a1);CHKERRQ(ierr); 1358 ierr = PetscFree(a2);CHKERRQ(ierr); 1359 if (nmatch < sz1) { 1360 *flg = PETSC_FALSE; 1361 } else { 1362 *flg = PETSC_TRUE; 1363 } 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1369 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1370 { 1371 PetscErrorCode ierr; 1372 IS iscol_local; 1373 PetscInt csize; 1374 PetscBool isequal; 1375 1376 PetscFunctionBegin; 1377 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1378 if (call == MAT_REUSE_MATRIX) { 1379 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1380 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1381 } else { 1382 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1383 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1384 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1385 } 1386 1387 /* now call MatGetSubMatrix_MPIBAIJ() */ 1388 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1389 if (call == MAT_INITIAL_MATRIX) { 1390 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1391 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1392 } 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1398 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1399 { 1400 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1405 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1411 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1412 { 1413 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1414 Mat A = a->A,B = a->B; 1415 PetscErrorCode ierr; 1416 PetscReal isend[5],irecv[5]; 1417 1418 PetscFunctionBegin; 1419 info->block_size = (PetscReal)matin->rmap->bs; 1420 1421 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1422 1423 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1424 isend[3] = info->memory; isend[4] = info->mallocs; 1425 1426 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1427 1428 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1429 isend[3] += info->memory; isend[4] += info->mallocs; 1430 if (flag == MAT_LOCAL) { 1431 info->nz_used = isend[0]; 1432 info->nz_allocated = isend[1]; 1433 info->nz_unneeded = isend[2]; 1434 info->memory = isend[3]; 1435 info->mallocs = isend[4]; 1436 } else if (flag == MAT_GLOBAL_MAX) { 1437 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1438 1439 info->nz_used = irecv[0]; 1440 info->nz_allocated = irecv[1]; 1441 info->nz_unneeded = irecv[2]; 1442 info->memory = irecv[3]; 1443 info->mallocs = irecv[4]; 1444 } else if (flag == MAT_GLOBAL_SUM) { 1445 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1446 1447 info->nz_used = irecv[0]; 1448 info->nz_allocated = irecv[1]; 1449 info->nz_unneeded = irecv[2]; 1450 info->memory = irecv[3]; 1451 info->mallocs = irecv[4]; 1452 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1453 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1454 info->fill_ratio_needed = 0; 1455 info->factor_mallocs = 0; 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1461 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1462 { 1463 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1464 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1465 PetscErrorCode ierr; 1466 1467 PetscFunctionBegin; 1468 switch (op) { 1469 case MAT_NEW_NONZERO_LOCATIONS: 1470 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1471 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1472 case MAT_KEEP_NONZERO_PATTERN: 1473 case MAT_NEW_NONZERO_LOCATION_ERR: 1474 MatCheckPreallocated(A,1); 1475 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1476 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1477 break; 1478 case MAT_ROW_ORIENTED: 1479 MatCheckPreallocated(A,1); 1480 a->roworiented = flg; 1481 1482 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1483 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1484 break; 1485 case MAT_NEW_DIAGONALS: 1486 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1487 break; 1488 case MAT_IGNORE_OFF_PROC_ENTRIES: 1489 a->donotstash = flg; 1490 break; 1491 case MAT_USE_HASH_TABLE: 1492 a->ht_flag = flg; 1493 break; 1494 case MAT_HERMITIAN: 1495 MatCheckPreallocated(A,1); 1496 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1497 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1498 1499 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1500 break; 1501 case MAT_SPD: 1502 A->spd_set = PETSC_TRUE; 1503 A->spd = flg; 1504 if (flg) { 1505 A->symmetric = PETSC_TRUE; 1506 A->structurally_symmetric = PETSC_TRUE; 1507 A->symmetric_set = PETSC_TRUE; 1508 A->structurally_symmetric_set = PETSC_TRUE; 1509 } 1510 break; 1511 case MAT_SYMMETRIC: 1512 MatCheckPreallocated(A,1); 1513 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1514 break; 1515 case MAT_STRUCTURALLY_SYMMETRIC: 1516 MatCheckPreallocated(A,1); 1517 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1518 break; 1519 case MAT_SYMMETRY_ETERNAL: 1520 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1521 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1522 break; 1523 case MAT_IGNORE_LOWER_TRIANGULAR: 1524 aA->ignore_ltriangular = flg; 1525 break; 1526 case MAT_ERROR_LOWER_TRIANGULAR: 1527 aA->ignore_ltriangular = flg; 1528 break; 1529 case MAT_GETROW_UPPERTRIANGULAR: 1530 aA->getrow_utriangular = flg; 1531 break; 1532 default: 1533 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1540 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1541 { 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 if (MAT_INITIAL_MATRIX || *B != A) { 1546 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1547 } 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1553 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1554 { 1555 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1556 Mat a = baij->A, b=baij->B; 1557 PetscErrorCode ierr; 1558 PetscInt nv,m,n; 1559 PetscBool flg; 1560 1561 PetscFunctionBegin; 1562 if (ll != rr) { 1563 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1564 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1565 } 1566 if (!ll) PetscFunctionReturn(0); 1567 1568 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1569 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1570 1571 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1572 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1573 1574 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1575 1576 /* left diagonalscale the off-diagonal part */ 1577 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1578 1579 /* scale the diagonal part */ 1580 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1581 1582 /* right diagonalscale the off-diagonal part */ 1583 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1590 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1591 { 1592 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatEqual_MPISBAIJ" 1604 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1605 { 1606 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1607 Mat a,b,c,d; 1608 PetscBool flg; 1609 PetscErrorCode ierr; 1610 1611 PetscFunctionBegin; 1612 a = matA->A; b = matA->B; 1613 c = matB->A; d = matB->B; 1614 1615 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1616 if (flg) { 1617 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1618 } 1619 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatCopy_MPISBAIJ" 1625 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1626 { 1627 PetscErrorCode ierr; 1628 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1629 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1630 1631 PetscFunctionBegin; 1632 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1633 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1634 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1635 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1636 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1637 } else { 1638 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1639 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1640 } 1641 PetscFunctionReturn(0); 1642 } 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1646 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1647 { 1648 PetscErrorCode ierr; 1649 1650 PetscFunctionBegin; 1651 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1652 PetscFunctionReturn(0); 1653 } 1654 1655 #undef __FUNCT__ 1656 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1657 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1658 { 1659 PetscErrorCode ierr; 1660 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1661 PetscBLASInt bnz,one=1; 1662 Mat_SeqSBAIJ *xa,*ya; 1663 Mat_SeqBAIJ *xb,*yb; 1664 1665 PetscFunctionBegin; 1666 if (str == SAME_NONZERO_PATTERN) { 1667 PetscScalar alpha = a; 1668 xa = (Mat_SeqSBAIJ*)xx->A->data; 1669 ya = (Mat_SeqSBAIJ*)yy->A->data; 1670 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1671 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1672 xb = (Mat_SeqBAIJ*)xx->B->data; 1673 yb = (Mat_SeqBAIJ*)yy->B->data; 1674 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1675 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1676 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1677 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1678 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1679 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1680 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1681 } else { 1682 Mat B; 1683 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1684 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1685 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1686 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1687 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1688 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1689 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1690 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1691 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1692 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1693 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1694 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1695 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1696 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1697 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1698 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1699 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1700 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1701 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1702 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1703 } 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1709 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1710 { 1711 PetscErrorCode ierr; 1712 PetscInt i; 1713 PetscBool flg; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1717 for (i=0; i<n; i++) { 1718 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1719 if (!flg) { 1720 Mat Bseq = *B[i]; 1721 /* Bseq is non-symmetric SBAIJ matrix, only used internally by PETSc. 1722 Zero some ops' to avoid invalid usse */ 1723 ierr = MatSetOption(Bseq,MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1724 Bseq->ops->mult = 0; 1725 Bseq->ops->multadd = 0; 1726 Bseq->ops->multtranspose = 0; 1727 Bseq->ops->multtransposeadd = 0; 1728 Bseq->ops->lufactor = 0; 1729 Bseq->ops->choleskyfactor = 0; 1730 Bseq->ops->lufactorsymbolic = 0; 1731 Bseq->ops->choleskyfactorsymbolic = 0; 1732 Bseq->ops->getinertia = 0; 1733 } 1734 } 1735 PetscFunctionReturn(0); 1736 } 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatShift_MPISBAIJ" 1740 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1741 { 1742 PetscErrorCode ierr; 1743 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1744 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1745 1746 PetscFunctionBegin; 1747 if (!Y->preallocated) { 1748 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1749 } else if (!aij->nz) { 1750 PetscInt nonew = aij->nonew; 1751 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1752 aij->nonew = nonew; 1753 } 1754 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1755 PetscFunctionReturn(0); 1756 } 1757 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ" 1760 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1761 { 1762 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1763 PetscErrorCode ierr; 1764 1765 PetscFunctionBegin; 1766 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1767 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1768 if (d) { 1769 PetscInt rstart; 1770 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1771 *d += rstart/A->rmap->bs; 1772 1773 } 1774 PetscFunctionReturn(0); 1775 } 1776 1777 1778 /* -------------------------------------------------------------------*/ 1779 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1780 MatGetRow_MPISBAIJ, 1781 MatRestoreRow_MPISBAIJ, 1782 MatMult_MPISBAIJ, 1783 /* 4*/ MatMultAdd_MPISBAIJ, 1784 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1785 MatMultAdd_MPISBAIJ, 1786 0, 1787 0, 1788 0, 1789 /* 10*/ 0, 1790 0, 1791 0, 1792 MatSOR_MPISBAIJ, 1793 MatTranspose_MPISBAIJ, 1794 /* 15*/ MatGetInfo_MPISBAIJ, 1795 MatEqual_MPISBAIJ, 1796 MatGetDiagonal_MPISBAIJ, 1797 MatDiagonalScale_MPISBAIJ, 1798 MatNorm_MPISBAIJ, 1799 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1800 MatAssemblyEnd_MPISBAIJ, 1801 MatSetOption_MPISBAIJ, 1802 MatZeroEntries_MPISBAIJ, 1803 /* 24*/ 0, 1804 0, 1805 0, 1806 0, 1807 0, 1808 /* 29*/ MatSetUp_MPISBAIJ, 1809 0, 1810 0, 1811 0, 1812 0, 1813 /* 34*/ MatDuplicate_MPISBAIJ, 1814 0, 1815 0, 1816 0, 1817 0, 1818 /* 39*/ MatAXPY_MPISBAIJ, 1819 MatGetSubMatrices_MPISBAIJ, 1820 MatIncreaseOverlap_MPISBAIJ, 1821 MatGetValues_MPISBAIJ, 1822 MatCopy_MPISBAIJ, 1823 /* 44*/ 0, 1824 MatScale_MPISBAIJ, 1825 MatShift_MPISBAIJ, 1826 0, 1827 0, 1828 /* 49*/ 0, 1829 0, 1830 0, 1831 0, 1832 0, 1833 /* 54*/ 0, 1834 0, 1835 MatSetUnfactored_MPISBAIJ, 1836 0, 1837 MatSetValuesBlocked_MPISBAIJ, 1838 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1839 0, 1840 0, 1841 0, 1842 0, 1843 /* 64*/ 0, 1844 0, 1845 0, 1846 0, 1847 0, 1848 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1849 0, 1850 0, 1851 0, 1852 0, 1853 /* 74*/ 0, 1854 0, 1855 0, 1856 0, 1857 0, 1858 /* 79*/ 0, 1859 0, 1860 0, 1861 0, 1862 MatLoad_MPISBAIJ, 1863 /* 84*/ 0, 1864 0, 1865 0, 1866 0, 1867 0, 1868 /* 89*/ 0, 1869 0, 1870 0, 1871 0, 1872 0, 1873 /* 94*/ 0, 1874 0, 1875 0, 1876 0, 1877 0, 1878 /* 99*/ 0, 1879 0, 1880 0, 1881 0, 1882 0, 1883 /*104*/ 0, 1884 MatRealPart_MPISBAIJ, 1885 MatImaginaryPart_MPISBAIJ, 1886 MatGetRowUpperTriangular_MPISBAIJ, 1887 MatRestoreRowUpperTriangular_MPISBAIJ, 1888 /*109*/ 0, 1889 0, 1890 0, 1891 0, 1892 MatMissingDiagonal_MPISBAIJ, 1893 /*114*/ 0, 1894 0, 1895 0, 1896 0, 1897 0, 1898 /*119*/ 0, 1899 0, 1900 0, 1901 0, 1902 0, 1903 /*124*/ 0, 1904 0, 1905 0, 1906 0, 1907 0, 1908 /*129*/ 0, 1909 0, 1910 0, 1911 0, 1912 0, 1913 /*134*/ 0, 1914 0, 1915 0, 1916 0, 1917 0, 1918 /*139*/ 0, 1919 0, 1920 0, 1921 0, 1922 0, 1923 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1924 }; 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1928 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1929 { 1930 PetscFunctionBegin; 1931 *a = ((Mat_MPISBAIJ*)A->data)->A; 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1937 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1938 { 1939 Mat_MPISBAIJ *b; 1940 PetscErrorCode ierr; 1941 PetscInt i,mbs,Mbs; 1942 1943 PetscFunctionBegin; 1944 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1945 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1946 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1947 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1948 1949 b = (Mat_MPISBAIJ*)B->data; 1950 mbs = B->rmap->n/bs; 1951 Mbs = B->rmap->N/bs; 1952 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1953 1954 B->rmap->bs = bs; 1955 b->bs2 = bs*bs; 1956 b->mbs = mbs; 1957 b->Mbs = Mbs; 1958 b->nbs = B->cmap->n/bs; 1959 b->Nbs = B->cmap->N/bs; 1960 1961 for (i=0; i<=b->size; i++) { 1962 b->rangebs[i] = B->rmap->range[i]/bs; 1963 } 1964 b->rstartbs = B->rmap->rstart/bs; 1965 b->rendbs = B->rmap->rend/bs; 1966 1967 b->cstartbs = B->cmap->rstart/bs; 1968 b->cendbs = B->cmap->rend/bs; 1969 1970 if (!B->preallocated) { 1971 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1972 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1973 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1974 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1975 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1976 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1977 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1978 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1979 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1980 } 1981 1982 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1983 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1984 1985 B->preallocated = PETSC_TRUE; 1986 PetscFunctionReturn(0); 1987 } 1988 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1991 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1992 { 1993 PetscInt m,rstart,cstart,cend; 1994 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1995 const PetscInt *JJ =0; 1996 PetscScalar *values=0; 1997 PetscErrorCode ierr; 1998 1999 PetscFunctionBegin; 2000 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2001 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2002 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2003 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2004 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2005 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2006 m = B->rmap->n/bs; 2007 rstart = B->rmap->rstart/bs; 2008 cstart = B->cmap->rstart/bs; 2009 cend = B->cmap->rend/bs; 2010 2011 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2012 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2013 for (i=0; i<m; i++) { 2014 nz = ii[i+1] - ii[i]; 2015 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2016 nz_max = PetscMax(nz_max,nz); 2017 JJ = jj + ii[i]; 2018 for (j=0; j<nz; j++) { 2019 if (*JJ >= cstart) break; 2020 JJ++; 2021 } 2022 d = 0; 2023 for (; j<nz; j++) { 2024 if (*JJ++ >= cend) break; 2025 d++; 2026 } 2027 d_nnz[i] = d; 2028 o_nnz[i] = nz - d; 2029 } 2030 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2031 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2032 2033 values = (PetscScalar*)V; 2034 if (!values) { 2035 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2036 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2037 } 2038 for (i=0; i<m; i++) { 2039 PetscInt row = i + rstart; 2040 PetscInt ncols = ii[i+1] - ii[i]; 2041 const PetscInt *icols = jj + ii[i]; 2042 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2043 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2044 } 2045 2046 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2047 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2048 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2049 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 /*MC 2054 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2055 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2056 the matrix is stored. 2057 2058 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2059 can call MatSetOption(Mat, MAT_HERMITIAN); 2060 2061 Options Database Keys: 2062 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2063 2064 Level: beginner 2065 2066 .seealso: MatCreateMPISBAIJ 2067 M*/ 2068 2069 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2070 2071 #undef __FUNCT__ 2072 #define __FUNCT__ "MatCreate_MPISBAIJ" 2073 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2074 { 2075 Mat_MPISBAIJ *b; 2076 PetscErrorCode ierr; 2077 PetscBool flg = PETSC_FALSE; 2078 2079 PetscFunctionBegin; 2080 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2081 B->data = (void*)b; 2082 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2083 2084 B->ops->destroy = MatDestroy_MPISBAIJ; 2085 B->ops->view = MatView_MPISBAIJ; 2086 B->assembled = PETSC_FALSE; 2087 B->insertmode = NOT_SET_VALUES; 2088 2089 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2090 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2091 2092 /* build local table of row and column ownerships */ 2093 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2094 2095 /* build cache for off array entries formed */ 2096 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2097 2098 b->donotstash = PETSC_FALSE; 2099 b->colmap = NULL; 2100 b->garray = NULL; 2101 b->roworiented = PETSC_TRUE; 2102 2103 /* stuff used in block assembly */ 2104 b->barray = 0; 2105 2106 /* stuff used for matrix vector multiply */ 2107 b->lvec = 0; 2108 b->Mvctx = 0; 2109 b->slvec0 = 0; 2110 b->slvec0b = 0; 2111 b->slvec1 = 0; 2112 b->slvec1a = 0; 2113 b->slvec1b = 0; 2114 b->sMvctx = 0; 2115 2116 /* stuff for MatGetRow() */ 2117 b->rowindices = 0; 2118 b->rowvalues = 0; 2119 b->getrowactive = PETSC_FALSE; 2120 2121 /* hash table stuff */ 2122 b->ht = 0; 2123 b->hd = 0; 2124 b->ht_size = 0; 2125 b->ht_flag = PETSC_FALSE; 2126 b->ht_fact = 0; 2127 b->ht_total_ct = 0; 2128 b->ht_insert_ct = 0; 2129 2130 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2131 b->ijonly = PETSC_FALSE; 2132 2133 b->in_loc = 0; 2134 b->v_loc = 0; 2135 b->n_loc = 0; 2136 2137 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2138 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2139 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2140 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2141 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2142 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 2143 #if defined(PETSC_HAVE_ELEMENTAL) 2144 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2145 #endif 2146 2147 B->symmetric = PETSC_TRUE; 2148 B->structurally_symmetric = PETSC_TRUE; 2149 B->symmetric_set = PETSC_TRUE; 2150 B->structurally_symmetric_set = PETSC_TRUE; 2151 2152 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2153 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2154 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2155 if (flg) { 2156 PetscReal fact = 1.39; 2157 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2158 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2159 if (fact <= 1.0) fact = 1.39; 2160 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2161 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2162 } 2163 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2164 PetscFunctionReturn(0); 2165 } 2166 2167 /*MC 2168 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2169 2170 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2171 and MATMPISBAIJ otherwise. 2172 2173 Options Database Keys: 2174 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2175 2176 Level: beginner 2177 2178 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2179 M*/ 2180 2181 #undef __FUNCT__ 2182 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2183 /*@C 2184 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2185 the user should preallocate the matrix storage by setting the parameters 2186 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2187 performance can be increased by more than a factor of 50. 2188 2189 Collective on Mat 2190 2191 Input Parameters: 2192 + B - the matrix 2193 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2194 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2195 . d_nz - number of block nonzeros per block row in diagonal portion of local 2196 submatrix (same for all local rows) 2197 . d_nnz - array containing the number of block nonzeros in the various block rows 2198 in the upper triangular and diagonal part of the in diagonal portion of the local 2199 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2200 for the diagonal entry and set a value even if it is zero. 2201 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2202 submatrix (same for all local rows). 2203 - o_nnz - array containing the number of nonzeros in the various block rows of the 2204 off-diagonal portion of the local submatrix that is right of the diagonal 2205 (possibly different for each block row) or NULL. 2206 2207 2208 Options Database Keys: 2209 . -mat_no_unroll - uses code that does not unroll the loops in the 2210 block calculations (much slower) 2211 . -mat_block_size - size of the blocks to use 2212 2213 Notes: 2214 2215 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2216 than it must be used on all processors that share the object for that argument. 2217 2218 If the *_nnz parameter is given then the *_nz parameter is ignored 2219 2220 Storage Information: 2221 For a square global matrix we define each processor's diagonal portion 2222 to be its local rows and the corresponding columns (a square submatrix); 2223 each processor's off-diagonal portion encompasses the remainder of the 2224 local matrix (a rectangular submatrix). 2225 2226 The user can specify preallocated storage for the diagonal part of 2227 the local submatrix with either d_nz or d_nnz (not both). Set 2228 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2229 memory allocation. Likewise, specify preallocated storage for the 2230 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2231 2232 You can call MatGetInfo() to get information on how effective the preallocation was; 2233 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2234 You can also run with the option -info and look for messages with the string 2235 malloc in them to see if additional memory allocation was needed. 2236 2237 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2238 the figure below we depict these three local rows and all columns (0-11). 2239 2240 .vb 2241 0 1 2 3 4 5 6 7 8 9 10 11 2242 -------------------------- 2243 row 3 |. . . d d d o o o o o o 2244 row 4 |. . . d d d o o o o o o 2245 row 5 |. . . d d d o o o o o o 2246 -------------------------- 2247 .ve 2248 2249 Thus, any entries in the d locations are stored in the d (diagonal) 2250 submatrix, and any entries in the o locations are stored in the 2251 o (off-diagonal) submatrix. Note that the d matrix is stored in 2252 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2253 2254 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2255 plus the diagonal part of the d matrix, 2256 and o_nz should indicate the number of block nonzeros per row in the o matrix 2257 2258 In general, for PDE problems in which most nonzeros are near the diagonal, 2259 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2260 or you will get TERRIBLE performance; see the users' manual chapter on 2261 matrices. 2262 2263 Level: intermediate 2264 2265 .keywords: matrix, block, aij, compressed row, sparse, parallel 2266 2267 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2268 @*/ 2269 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2270 { 2271 PetscErrorCode ierr; 2272 2273 PetscFunctionBegin; 2274 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2275 PetscValidType(B,1); 2276 PetscValidLogicalCollectiveInt(B,bs,2); 2277 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatCreateSBAIJ" 2283 /*@C 2284 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2285 (block compressed row). For good matrix assembly performance 2286 the user should preallocate the matrix storage by setting the parameters 2287 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2288 performance can be increased by more than a factor of 50. 2289 2290 Collective on MPI_Comm 2291 2292 Input Parameters: 2293 + comm - MPI communicator 2294 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2295 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2296 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2297 This value should be the same as the local size used in creating the 2298 y vector for the matrix-vector product y = Ax. 2299 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2300 This value should be the same as the local size used in creating the 2301 x vector for the matrix-vector product y = Ax. 2302 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2303 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2304 . d_nz - number of block nonzeros per block row in diagonal portion of local 2305 submatrix (same for all local rows) 2306 . d_nnz - array containing the number of block nonzeros in the various block rows 2307 in the upper triangular portion of the in diagonal portion of the local 2308 (possibly different for each block block row) or NULL. 2309 If you plan to factor the matrix you must leave room for the diagonal entry and 2310 set its value even if it is zero. 2311 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2312 submatrix (same for all local rows). 2313 - o_nnz - array containing the number of nonzeros in the various block rows of the 2314 off-diagonal portion of the local submatrix (possibly different for 2315 each block row) or NULL. 2316 2317 Output Parameter: 2318 . A - the matrix 2319 2320 Options Database Keys: 2321 . -mat_no_unroll - uses code that does not unroll the loops in the 2322 block calculations (much slower) 2323 . -mat_block_size - size of the blocks to use 2324 . -mat_mpi - use the parallel matrix data structures even on one processor 2325 (defaults to using SeqBAIJ format on one processor) 2326 2327 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2328 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2329 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2330 2331 Notes: 2332 The number of rows and columns must be divisible by blocksize. 2333 This matrix type does not support complex Hermitian operation. 2334 2335 The user MUST specify either the local or global matrix dimensions 2336 (possibly both). 2337 2338 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2339 than it must be used on all processors that share the object for that argument. 2340 2341 If the *_nnz parameter is given then the *_nz parameter is ignored 2342 2343 Storage Information: 2344 For a square global matrix we define each processor's diagonal portion 2345 to be its local rows and the corresponding columns (a square submatrix); 2346 each processor's off-diagonal portion encompasses the remainder of the 2347 local matrix (a rectangular submatrix). 2348 2349 The user can specify preallocated storage for the diagonal part of 2350 the local submatrix with either d_nz or d_nnz (not both). Set 2351 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2352 memory allocation. Likewise, specify preallocated storage for the 2353 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2354 2355 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2356 the figure below we depict these three local rows and all columns (0-11). 2357 2358 .vb 2359 0 1 2 3 4 5 6 7 8 9 10 11 2360 -------------------------- 2361 row 3 |. . . d d d o o o o o o 2362 row 4 |. . . d d d o o o o o o 2363 row 5 |. . . d d d o o o o o o 2364 -------------------------- 2365 .ve 2366 2367 Thus, any entries in the d locations are stored in the d (diagonal) 2368 submatrix, and any entries in the o locations are stored in the 2369 o (off-diagonal) submatrix. Note that the d matrix is stored in 2370 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2371 2372 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2373 plus the diagonal part of the d matrix, 2374 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2375 In general, for PDE problems in which most nonzeros are near the diagonal, 2376 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2377 or you will get TERRIBLE performance; see the users' manual chapter on 2378 matrices. 2379 2380 Level: intermediate 2381 2382 .keywords: matrix, block, aij, compressed row, sparse, parallel 2383 2384 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2385 @*/ 2386 2387 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2388 { 2389 PetscErrorCode ierr; 2390 PetscMPIInt size; 2391 2392 PetscFunctionBegin; 2393 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2394 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2395 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2396 if (size > 1) { 2397 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2398 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2399 } else { 2400 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2401 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2402 } 2403 PetscFunctionReturn(0); 2404 } 2405 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2409 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2410 { 2411 Mat mat; 2412 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2413 PetscErrorCode ierr; 2414 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2415 PetscScalar *array; 2416 2417 PetscFunctionBegin; 2418 *newmat = 0; 2419 2420 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2421 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2422 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2423 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2424 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2425 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2426 2427 mat->factortype = matin->factortype; 2428 mat->preallocated = PETSC_TRUE; 2429 mat->assembled = PETSC_TRUE; 2430 mat->insertmode = NOT_SET_VALUES; 2431 2432 a = (Mat_MPISBAIJ*)mat->data; 2433 a->bs2 = oldmat->bs2; 2434 a->mbs = oldmat->mbs; 2435 a->nbs = oldmat->nbs; 2436 a->Mbs = oldmat->Mbs; 2437 a->Nbs = oldmat->Nbs; 2438 2439 2440 a->size = oldmat->size; 2441 a->rank = oldmat->rank; 2442 a->donotstash = oldmat->donotstash; 2443 a->roworiented = oldmat->roworiented; 2444 a->rowindices = 0; 2445 a->rowvalues = 0; 2446 a->getrowactive = PETSC_FALSE; 2447 a->barray = 0; 2448 a->rstartbs = oldmat->rstartbs; 2449 a->rendbs = oldmat->rendbs; 2450 a->cstartbs = oldmat->cstartbs; 2451 a->cendbs = oldmat->cendbs; 2452 2453 /* hash table stuff */ 2454 a->ht = 0; 2455 a->hd = 0; 2456 a->ht_size = 0; 2457 a->ht_flag = oldmat->ht_flag; 2458 a->ht_fact = oldmat->ht_fact; 2459 a->ht_total_ct = 0; 2460 a->ht_insert_ct = 0; 2461 2462 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2463 if (oldmat->colmap) { 2464 #if defined(PETSC_USE_CTABLE) 2465 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2466 #else 2467 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2468 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2469 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2470 #endif 2471 } else a->colmap = 0; 2472 2473 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2474 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2475 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2476 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2477 } else a->garray = 0; 2478 2479 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2480 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2481 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2482 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2483 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2484 2485 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2486 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2487 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2488 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2489 2490 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2491 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2492 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2493 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2494 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2495 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2496 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2497 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2498 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2499 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2500 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2501 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2502 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2503 2504 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2505 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2506 a->sMvctx = oldmat->sMvctx; 2507 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2508 2509 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2510 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2511 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2512 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2513 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2514 *newmat = mat; 2515 PetscFunctionReturn(0); 2516 } 2517 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "MatLoad_MPISBAIJ" 2520 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2521 { 2522 PetscErrorCode ierr; 2523 PetscInt i,nz,j,rstart,rend; 2524 PetscScalar *vals,*buf; 2525 MPI_Comm comm; 2526 MPI_Status status; 2527 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2528 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2529 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2530 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2531 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2532 PetscInt dcount,kmax,k,nzcount,tmp; 2533 int fd; 2534 2535 PetscFunctionBegin; 2536 /* force binary viewer to load .info file if it has not yet done so */ 2537 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2538 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2539 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2540 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2541 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2542 if (bs < 0) bs = 1; 2543 2544 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2545 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2546 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2547 if (!rank) { 2548 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2549 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2550 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2551 } 2552 2553 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2554 M = header[1]; 2555 N = header[2]; 2556 2557 /* If global sizes are set, check if they are consistent with that given in the file */ 2558 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2559 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2560 2561 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2562 2563 /* 2564 This code adds extra rows to make sure the number of rows is 2565 divisible by the blocksize 2566 */ 2567 Mbs = M/bs; 2568 extra_rows = bs - M + bs*(Mbs); 2569 if (extra_rows == bs) extra_rows = 0; 2570 else Mbs++; 2571 if (extra_rows &&!rank) { 2572 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2573 } 2574 2575 /* determine ownership of all rows */ 2576 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2577 mbs = Mbs/size + ((Mbs % size) > rank); 2578 m = mbs*bs; 2579 } else { /* User Set */ 2580 m = newmat->rmap->n; 2581 mbs = m/bs; 2582 } 2583 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2584 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2585 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2586 rowners[0] = 0; 2587 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2588 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2589 rstart = rowners[rank]; 2590 rend = rowners[rank+1]; 2591 2592 /* distribute row lengths to all processors */ 2593 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2594 if (!rank) { 2595 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2596 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2597 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2598 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2599 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2600 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2601 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2602 } else { 2603 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2604 } 2605 2606 if (!rank) { /* procs[0] */ 2607 /* calculate the number of nonzeros on each processor */ 2608 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2609 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2610 for (i=0; i<size; i++) { 2611 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2612 procsnz[i] += rowlengths[j]; 2613 } 2614 } 2615 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2616 2617 /* determine max buffer needed and allocate it */ 2618 maxnz = 0; 2619 for (i=0; i<size; i++) { 2620 maxnz = PetscMax(maxnz,procsnz[i]); 2621 } 2622 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2623 2624 /* read in my part of the matrix column indices */ 2625 nz = procsnz[0]; 2626 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2627 mycols = ibuf; 2628 if (size == 1) nz -= extra_rows; 2629 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2630 if (size == 1) { 2631 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2632 } 2633 2634 /* read in every ones (except the last) and ship off */ 2635 for (i=1; i<size-1; i++) { 2636 nz = procsnz[i]; 2637 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2638 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2639 } 2640 /* read in the stuff for the last proc */ 2641 if (size != 1) { 2642 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2643 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2644 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2645 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2646 } 2647 ierr = PetscFree(cols);CHKERRQ(ierr); 2648 } else { /* procs[i], i>0 */ 2649 /* determine buffer space needed for message */ 2650 nz = 0; 2651 for (i=0; i<m; i++) nz += locrowlens[i]; 2652 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2653 mycols = ibuf; 2654 /* receive message of column indices*/ 2655 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2656 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2657 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2658 } 2659 2660 /* loop over local rows, determining number of off diagonal entries */ 2661 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2662 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2663 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2664 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2665 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2666 rowcount = 0; 2667 nzcount = 0; 2668 for (i=0; i<mbs; i++) { 2669 dcount = 0; 2670 odcount = 0; 2671 for (j=0; j<bs; j++) { 2672 kmax = locrowlens[rowcount]; 2673 for (k=0; k<kmax; k++) { 2674 tmp = mycols[nzcount++]/bs; /* block col. index */ 2675 if (!mask[tmp]) { 2676 mask[tmp] = 1; 2677 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2678 else masked1[dcount++] = tmp; /* entry in diag portion */ 2679 } 2680 } 2681 rowcount++; 2682 } 2683 2684 dlens[i] = dcount; /* d_nzz[i] */ 2685 odlens[i] = odcount; /* o_nzz[i] */ 2686 2687 /* zero out the mask elements we set */ 2688 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2689 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2690 } 2691 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2692 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2693 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2694 2695 if (!rank) { 2696 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2697 /* read in my part of the matrix numerical values */ 2698 nz = procsnz[0]; 2699 vals = buf; 2700 mycols = ibuf; 2701 if (size == 1) nz -= extra_rows; 2702 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2703 if (size == 1) { 2704 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2705 } 2706 2707 /* insert into matrix */ 2708 jj = rstart*bs; 2709 for (i=0; i<m; i++) { 2710 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2711 mycols += locrowlens[i]; 2712 vals += locrowlens[i]; 2713 jj++; 2714 } 2715 2716 /* read in other processors (except the last one) and ship out */ 2717 for (i=1; i<size-1; i++) { 2718 nz = procsnz[i]; 2719 vals = buf; 2720 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2721 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2722 } 2723 /* the last proc */ 2724 if (size != 1) { 2725 nz = procsnz[i] - extra_rows; 2726 vals = buf; 2727 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2728 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2729 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2730 } 2731 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2732 2733 } else { 2734 /* receive numeric values */ 2735 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2736 2737 /* receive message of values*/ 2738 vals = buf; 2739 mycols = ibuf; 2740 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2741 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2742 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2743 2744 /* insert into matrix */ 2745 jj = rstart*bs; 2746 for (i=0; i<m; i++) { 2747 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2748 mycols += locrowlens[i]; 2749 vals += locrowlens[i]; 2750 jj++; 2751 } 2752 } 2753 2754 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2755 ierr = PetscFree(buf);CHKERRQ(ierr); 2756 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2757 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2758 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2759 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2760 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2761 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2762 PetscFunctionReturn(0); 2763 } 2764 2765 #undef __FUNCT__ 2766 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2767 /*XXXXX@ 2768 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2769 2770 Input Parameters: 2771 . mat - the matrix 2772 . fact - factor 2773 2774 Not Collective on Mat, each process can have a different hash factor 2775 2776 Level: advanced 2777 2778 Notes: 2779 This can also be set by the command line option: -mat_use_hash_table fact 2780 2781 .keywords: matrix, hashtable, factor, HT 2782 2783 .seealso: MatSetOption() 2784 @XXXXX*/ 2785 2786 2787 #undef __FUNCT__ 2788 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2789 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2790 { 2791 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2792 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2793 PetscReal atmp; 2794 PetscReal *work,*svalues,*rvalues; 2795 PetscErrorCode ierr; 2796 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2797 PetscMPIInt rank,size; 2798 PetscInt *rowners_bs,dest,count,source; 2799 PetscScalar *va; 2800 MatScalar *ba; 2801 MPI_Status stat; 2802 2803 PetscFunctionBegin; 2804 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2805 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2806 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2807 2808 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2809 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2810 2811 bs = A->rmap->bs; 2812 mbs = a->mbs; 2813 Mbs = a->Mbs; 2814 ba = b->a; 2815 bi = b->i; 2816 bj = b->j; 2817 2818 /* find ownerships */ 2819 rowners_bs = A->rmap->range; 2820 2821 /* each proc creates an array to be distributed */ 2822 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2823 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2824 2825 /* row_max for B */ 2826 if (rank != size-1) { 2827 for (i=0; i<mbs; i++) { 2828 ncols = bi[1] - bi[0]; bi++; 2829 brow = bs*i; 2830 for (j=0; j<ncols; j++) { 2831 bcol = bs*(*bj); 2832 for (kcol=0; kcol<bs; kcol++) { 2833 col = bcol + kcol; /* local col index */ 2834 col += rowners_bs[rank+1]; /* global col index */ 2835 for (krow=0; krow<bs; krow++) { 2836 atmp = PetscAbsScalar(*ba); ba++; 2837 row = brow + krow; /* local row index */ 2838 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2839 if (work[col] < atmp) work[col] = atmp; 2840 } 2841 } 2842 bj++; 2843 } 2844 } 2845 2846 /* send values to its owners */ 2847 for (dest=rank+1; dest<size; dest++) { 2848 svalues = work + rowners_bs[dest]; 2849 count = rowners_bs[dest+1]-rowners_bs[dest]; 2850 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2851 } 2852 } 2853 2854 /* receive values */ 2855 if (rank) { 2856 rvalues = work; 2857 count = rowners_bs[rank+1]-rowners_bs[rank]; 2858 for (source=0; source<rank; source++) { 2859 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2860 /* process values */ 2861 for (i=0; i<count; i++) { 2862 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2863 } 2864 } 2865 } 2866 2867 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2868 ierr = PetscFree(work);CHKERRQ(ierr); 2869 PetscFunctionReturn(0); 2870 } 2871 2872 #undef __FUNCT__ 2873 #define __FUNCT__ "MatSOR_MPISBAIJ" 2874 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2875 { 2876 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2877 PetscErrorCode ierr; 2878 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2879 PetscScalar *x,*ptr,*from; 2880 Vec bb1; 2881 const PetscScalar *b; 2882 2883 PetscFunctionBegin; 2884 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2885 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2886 2887 if (flag == SOR_APPLY_UPPER) { 2888 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2889 PetscFunctionReturn(0); 2890 } 2891 2892 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2893 if (flag & SOR_ZERO_INITIAL_GUESS) { 2894 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2895 its--; 2896 } 2897 2898 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2899 while (its--) { 2900 2901 /* lower triangular part: slvec0b = - B^T*xx */ 2902 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2903 2904 /* copy xx into slvec0a */ 2905 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2906 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2907 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2908 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2909 2910 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2911 2912 /* copy bb into slvec1a */ 2913 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2914 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2915 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2916 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2917 2918 /* set slvec1b = 0 */ 2919 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2920 2921 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2922 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2923 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2924 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2925 2926 /* upper triangular part: bb1 = bb1 - B*x */ 2927 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2928 2929 /* local diagonal sweep */ 2930 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2931 } 2932 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2933 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2934 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2935 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2936 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2937 } else if (flag & SOR_EISENSTAT) { 2938 Vec xx1; 2939 PetscBool hasop; 2940 const PetscScalar *diag; 2941 PetscScalar *sl,scale = (omega - 2.0)/omega; 2942 PetscInt i,n; 2943 2944 if (!mat->xx1) { 2945 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2946 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2947 } 2948 xx1 = mat->xx1; 2949 bb1 = mat->bb1; 2950 2951 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2952 2953 if (!mat->diag) { 2954 /* this is wrong for same matrix with new nonzero values */ 2955 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2956 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2957 } 2958 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2959 2960 if (hasop) { 2961 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2962 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2963 } else { 2964 /* 2965 These two lines are replaced by code that may be a bit faster for a good compiler 2966 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2967 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2968 */ 2969 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2970 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2971 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2972 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2973 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2974 if (omega == 1.0) { 2975 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2976 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2977 } else { 2978 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2979 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2980 } 2981 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2982 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2983 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2984 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2985 } 2986 2987 /* multiply off-diagonal portion of matrix */ 2988 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2989 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2990 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2991 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2992 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2993 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2994 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2995 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2996 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2997 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2998 2999 /* local sweep */ 3000 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3001 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3002 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3003 PetscFunctionReturn(0); 3004 } 3005 3006 #undef __FUNCT__ 3007 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 3008 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3009 { 3010 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3011 PetscErrorCode ierr; 3012 Vec lvec1,bb1; 3013 3014 PetscFunctionBegin; 3015 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3016 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3017 3018 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3019 if (flag & SOR_ZERO_INITIAL_GUESS) { 3020 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3021 its--; 3022 } 3023 3024 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3026 while (its--) { 3027 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3028 3029 /* lower diagonal part: bb1 = bb - B^T*xx */ 3030 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3031 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3032 3033 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3034 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3035 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3036 3037 /* upper diagonal part: bb1 = bb1 - B*x */ 3038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3040 3041 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3042 3043 /* diagonal sweep */ 3044 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3045 } 3046 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3047 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3048 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3049 PetscFunctionReturn(0); 3050 } 3051 3052 #undef __FUNCT__ 3053 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3054 /*@ 3055 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3056 CSR format the local rows. 3057 3058 Collective on MPI_Comm 3059 3060 Input Parameters: 3061 + comm - MPI communicator 3062 . bs - the block size, only a block size of 1 is supported 3063 . m - number of local rows (Cannot be PETSC_DECIDE) 3064 . n - This value should be the same as the local size used in creating the 3065 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3066 calculated if N is given) For square matrices n is almost always m. 3067 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3068 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3069 . i - row indices 3070 . j - column indices 3071 - a - matrix values 3072 3073 Output Parameter: 3074 . mat - the matrix 3075 3076 Level: intermediate 3077 3078 Notes: 3079 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3080 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3081 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3082 3083 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3084 3085 .keywords: matrix, aij, compressed row, sparse, parallel 3086 3087 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3088 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3089 @*/ 3090 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3091 { 3092 PetscErrorCode ierr; 3093 3094 3095 PetscFunctionBegin; 3096 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3097 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3098 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3099 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3100 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3101 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3102 PetscFunctionReturn(0); 3103 } 3104 3105 3106 #undef __FUNCT__ 3107 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3108 /*@C 3109 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3110 (the default parallel PETSc format). 3111 3112 Collective on MPI_Comm 3113 3114 Input Parameters: 3115 + B - the matrix 3116 . bs - the block size 3117 . i - the indices into j for the start of each local row (starts with zero) 3118 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3119 - v - optional values in the matrix 3120 3121 Level: developer 3122 3123 .keywords: matrix, aij, compressed row, sparse, parallel 3124 3125 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3126 @*/ 3127 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3128 { 3129 PetscErrorCode ierr; 3130 3131 PetscFunctionBegin; 3132 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3133 PetscFunctionReturn(0); 3134 } 3135 3136 #undef __FUNCT__ 3137 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3138 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3139 { 3140 PetscErrorCode ierr; 3141 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3142 PetscInt *indx; 3143 PetscScalar *values; 3144 3145 PetscFunctionBegin; 3146 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3147 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3148 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3149 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3150 PetscInt *bindx,rmax=a->rmax,j; 3151 3152 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3153 mbs = m/bs; Nbs = N/cbs; 3154 if (n == PETSC_DECIDE) { 3155 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3156 } 3157 /* Check sum(n) = Nbs */ 3158 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3159 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3160 3161 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3162 rstart -= mbs; 3163 3164 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3165 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3166 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3167 for (i=0; i<mbs; i++) { 3168 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3169 nnz = nnz/bs; 3170 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3171 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3172 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3173 } 3174 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3175 ierr = PetscFree(bindx);CHKERRQ(ierr); 3176 3177 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3178 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3179 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3180 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3181 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3182 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3183 } 3184 3185 /* numeric phase */ 3186 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3187 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3188 3189 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3190 for (i=0; i<m; i++) { 3191 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3192 Ii = i + rstart; 3193 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3194 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3195 } 3196 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3197 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3198 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3199 PetscFunctionReturn(0); 3200 } 3201