1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 } 637 /* Bmat */ 638 v = bmat->a; jj = bmat->j; 639 for (brow=0; brow<mbs; brow++) { 640 grow = bs*(rstart + brow); 641 nz = bmat->i[brow+1] - bmat->i[brow]; 642 for (bcol=0; bcol<nz; bcol++) { 643 gcol = bs*garray[*jj]; jj++; 644 for (col=0; col<bs; col++) { 645 for (row=0; row<bs; row++) { 646 vabs = PetscAbsScalar(*v); v++; 647 rsum[gcol+col] += vabs; 648 rsum[grow+row] += vabs; 649 } 650 } 651 } 652 } 653 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 654 *norm = 0.0; 655 for (col=0; col<mat->cmap->N; col++) { 656 if (rsum2[col] > *norm) *norm = rsum2[col]; 657 } 658 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 660 } 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscInt nstash,reallocs; 671 InsertMode addv; 672 673 PetscFunctionBegin; 674 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 675 676 /* make sure all processors are either in INSERTMODE or ADDMODE */ 677 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 678 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 679 mat->insertmode = addv; /* in case this processor had no cache */ 680 681 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 682 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 683 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 684 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 685 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 686 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 692 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 693 { 694 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 695 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 696 PetscErrorCode ierr; 697 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 698 PetscInt *row,*col; 699 PetscBool other_disassembled; 700 PetscMPIInt n; 701 PetscBool r1,r2,r3; 702 MatScalar *val; 703 InsertMode addv = mat->insertmode; 704 705 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 706 PetscFunctionBegin; 707 if (!baij->donotstash && !mat->nooffprocentries) { 708 while (1) { 709 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 710 if (!flg) break; 711 712 for (i=0; i<n;) { 713 /* Now identify the consecutive vals belonging to the same row */ 714 for (j=i,rstart=row[j]; j<n; j++) { 715 if (row[j] != rstart) break; 716 } 717 if (j < n) ncols = j-i; 718 else ncols = n-i; 719 /* Now assemble all these values with a single function call */ 720 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 721 i = j; 722 } 723 } 724 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 725 /* Now process the block-stash. Since the values are stashed column-oriented, 726 set the roworiented flag to column oriented, and after MatSetValues() 727 restore the original flags */ 728 r1 = baij->roworiented; 729 r2 = a->roworiented; 730 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 731 732 baij->roworiented = PETSC_FALSE; 733 a->roworiented = PETSC_FALSE; 734 735 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 736 while (1) { 737 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 738 if (!flg) break; 739 740 for (i=0; i<n;) { 741 /* Now identify the consecutive vals belonging to the same row */ 742 for (j=i,rstart=row[j]; j<n; j++) { 743 if (row[j] != rstart) break; 744 } 745 if (j < n) ncols = j-i; 746 else ncols = n-i; 747 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 748 i = j; 749 } 750 } 751 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 752 753 baij->roworiented = r1; 754 a->roworiented = r2; 755 756 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 757 } 758 759 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 760 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 761 762 /* determine if any processor has disassembled, if so we must 763 also disassemble ourselfs, in order that we may reassemble. */ 764 /* 765 if nonzero structure of submatrix B cannot change then we know that 766 no processor disassembled thus we can skip this stuff 767 */ 768 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 769 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 770 if (mat->was_assembled && !other_disassembled) { 771 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 772 } 773 } 774 775 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 776 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 777 } 778 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 779 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 780 781 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 782 783 baij->rowvalues = 0; 784 785 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 786 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 787 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 788 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 789 } 790 PetscFunctionReturn(0); 791 } 792 793 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 794 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 795 #include <petscdraw.h> 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 798 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 799 { 800 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 801 PetscErrorCode ierr; 802 PetscInt bs = mat->rmap->bs; 803 PetscMPIInt rank = baij->rank; 804 PetscBool iascii,isdraw; 805 PetscViewer sviewer; 806 PetscViewerFormat format; 807 808 PetscFunctionBegin; 809 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 810 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 811 if (iascii) { 812 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 813 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 814 MatInfo info; 815 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 816 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 818 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 819 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 820 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 821 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 822 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 823 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 824 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 825 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 826 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } else if (format == PETSC_VIEWER_ASCII_INFO) { 829 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 832 PetscFunctionReturn(0); 833 } 834 } 835 836 if (isdraw) { 837 PetscDraw draw; 838 PetscBool isnull; 839 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 840 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 841 } 842 843 { 844 /* assemble the entire matrix onto first processor. */ 845 Mat A; 846 Mat_SeqSBAIJ *Aloc; 847 Mat_SeqBAIJ *Bloc; 848 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 849 MatScalar *a; 850 const char *matname; 851 852 /* Should this be the same type as mat? */ 853 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 854 if (!rank) { 855 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 856 } else { 857 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 858 } 859 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 860 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 861 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 862 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 863 864 /* copy over the A part */ 865 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 866 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 867 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 868 869 for (i=0; i<mbs; i++) { 870 rvals[0] = bs*(baij->rstartbs + i); 871 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 872 for (j=ai[i]; j<ai[i+1]; j++) { 873 col = (baij->cstartbs+aj[j])*bs; 874 for (k=0; k<bs; k++) { 875 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 876 col++; 877 a += bs; 878 } 879 } 880 } 881 /* copy over the B part */ 882 Bloc = (Mat_SeqBAIJ*)baij->B->data; 883 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 884 for (i=0; i<mbs; i++) { 885 886 rvals[0] = bs*(baij->rstartbs + i); 887 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 888 for (j=ai[i]; j<ai[i+1]; j++) { 889 col = baij->garray[aj[j]]*bs; 890 for (k=0; k<bs; k++) { 891 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 892 col++; 893 a += bs; 894 } 895 } 896 } 897 ierr = PetscFree(rvals);CHKERRQ(ierr); 898 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 899 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 900 /* 901 Everyone has to call to draw the matrix since the graphics waits are 902 synchronized across all processors that share the PetscDraw object 903 */ 904 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 905 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 906 if (!rank) { 907 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 908 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 909 } 910 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 911 ierr = MatDestroy(&A);CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } 915 916 #undef __FUNCT__ 917 #define __FUNCT__ "MatView_MPISBAIJ" 918 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 919 { 920 PetscErrorCode ierr; 921 PetscBool iascii,isdraw,issocket,isbinary; 922 923 PetscFunctionBegin; 924 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 925 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 926 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 927 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 928 if (iascii || isdraw || issocket || isbinary) { 929 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatDestroy_MPISBAIJ" 936 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 937 { 938 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 #if defined(PETSC_USE_LOG) 943 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 944 #endif 945 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 946 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 947 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 948 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 949 #if defined(PETSC_USE_CTABLE) 950 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 951 #else 952 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 953 #endif 954 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 955 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 956 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 957 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 958 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 959 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 960 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 961 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 962 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 963 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 964 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 965 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 966 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 967 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 968 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 969 #if defined(PETSC_USE_REAL_MAT_SINGLE) 970 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 971 #endif 972 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 973 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 974 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 975 ierr = PetscFree(mat->data);CHKERRQ(ierr); 976 977 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 978 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 979 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 980 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 981 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 982 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 983 #if defined(PETSC_HAVE_ELEMENTAL) 984 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 985 #endif 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 991 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 992 { 993 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 994 PetscErrorCode ierr; 995 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 996 PetscScalar *x,*from; 997 998 PetscFunctionBegin; 999 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1000 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1001 1002 /* diagonal part */ 1003 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1004 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1005 1006 /* subdiagonal part */ 1007 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1008 1009 /* copy x into the vec slvec0 */ 1010 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1011 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1012 1013 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1014 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1015 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1016 1017 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1018 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1019 /* supperdiagonal part */ 1020 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatMult_MPISBAIJ" 1026 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1027 { 1028 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1029 PetscErrorCode ierr; 1030 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1031 PetscScalar *from; 1032 const PetscScalar *x; 1033 1034 PetscFunctionBegin; 1035 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1036 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1037 1038 /* diagonal part */ 1039 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1040 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1041 1042 /* subdiagonal part */ 1043 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1044 1045 /* copy x into the vec slvec0 */ 1046 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1047 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1048 1049 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1050 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1051 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1052 1053 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 /* supperdiagonal part */ 1056 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1062 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1063 { 1064 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1065 PetscErrorCode ierr; 1066 PetscInt nt; 1067 1068 PetscFunctionBegin; 1069 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1070 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1071 1072 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1073 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1074 1075 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1076 /* do diagonal part */ 1077 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1078 /* do supperdiagonal part */ 1079 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1080 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1081 /* do subdiagonal part */ 1082 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1083 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1090 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1091 { 1092 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1095 PetscScalar *from,zero=0.0; 1096 const PetscScalar *x; 1097 1098 PetscFunctionBegin; 1099 /* 1100 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1101 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1102 */ 1103 /* diagonal part */ 1104 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1105 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1106 1107 /* subdiagonal part */ 1108 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1109 1110 /* copy x into the vec slvec0 */ 1111 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1112 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1113 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1114 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1115 1116 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 1120 /* supperdiagonal part */ 1121 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1127 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1128 { 1129 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1130 PetscErrorCode ierr; 1131 1132 PetscFunctionBegin; 1133 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1134 /* do diagonal part */ 1135 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1136 /* do supperdiagonal part */ 1137 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1139 1140 /* do subdiagonal part */ 1141 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1142 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 /* 1148 This only works correctly for square matrices where the subblock A->A is the 1149 diagonal block 1150 */ 1151 #undef __FUNCT__ 1152 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1153 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1154 { 1155 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1156 PetscErrorCode ierr; 1157 1158 PetscFunctionBegin; 1159 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1160 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatScale_MPISBAIJ" 1166 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1167 { 1168 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1169 PetscErrorCode ierr; 1170 1171 PetscFunctionBegin; 1172 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1173 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1179 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1180 { 1181 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1182 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1183 PetscErrorCode ierr; 1184 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1185 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1186 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1187 1188 PetscFunctionBegin; 1189 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1190 mat->getrowactive = PETSC_TRUE; 1191 1192 if (!mat->rowvalues && (idx || v)) { 1193 /* 1194 allocate enough space to hold information from the longest row. 1195 */ 1196 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1197 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1198 PetscInt max = 1,mbs = mat->mbs,tmp; 1199 for (i=0; i<mbs; i++) { 1200 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1201 if (max < tmp) max = tmp; 1202 } 1203 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1204 } 1205 1206 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1207 lrow = row - brstart; /* local row index */ 1208 1209 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1210 if (!v) {pvA = 0; pvB = 0;} 1211 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1212 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1213 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1214 nztot = nzA + nzB; 1215 1216 cmap = mat->garray; 1217 if (v || idx) { 1218 if (nztot) { 1219 /* Sort by increasing column numbers, assuming A and B already sorted */ 1220 PetscInt imark = -1; 1221 if (v) { 1222 *v = v_p = mat->rowvalues; 1223 for (i=0; i<nzB; i++) { 1224 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1225 else break; 1226 } 1227 imark = i; 1228 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1229 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1230 } 1231 if (idx) { 1232 *idx = idx_p = mat->rowindices; 1233 if (imark > -1) { 1234 for (i=0; i<imark; i++) { 1235 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1236 } 1237 } else { 1238 for (i=0; i<nzB; i++) { 1239 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1240 else break; 1241 } 1242 imark = i; 1243 } 1244 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1245 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1246 } 1247 } else { 1248 if (idx) *idx = 0; 1249 if (v) *v = 0; 1250 } 1251 } 1252 *nz = nztot; 1253 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1254 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1260 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1263 1264 PetscFunctionBegin; 1265 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1266 baij->getrowactive = PETSC_FALSE; 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1272 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1273 { 1274 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1275 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1276 1277 PetscFunctionBegin; 1278 aA->getrow_utriangular = PETSC_TRUE; 1279 PetscFunctionReturn(0); 1280 } 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1283 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1284 { 1285 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1286 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1287 1288 PetscFunctionBegin; 1289 aA->getrow_utriangular = PETSC_FALSE; 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1295 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1296 { 1297 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1298 PetscErrorCode ierr; 1299 1300 PetscFunctionBegin; 1301 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1302 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1308 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1309 { 1310 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1311 PetscErrorCode ierr; 1312 1313 PetscFunctionBegin; 1314 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1315 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1320 Input: isrow - distributed(parallel), 1321 iscol_local - locally owned (seq) 1322 */ 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "ISEqual_private" 1325 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1326 { 1327 PetscErrorCode ierr; 1328 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1329 const PetscInt *ptr1,*ptr2; 1330 1331 PetscFunctionBegin; 1332 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1333 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1334 if (sz1 > sz2) { 1335 *flg = PETSC_FALSE; 1336 PetscFunctionReturn(0); 1337 } 1338 1339 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1340 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1341 1342 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1343 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1344 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1345 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1346 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1347 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1348 1349 nmatch=0; 1350 k = 0; 1351 for (i=0; i<sz1; i++){ 1352 for (j=k; j<sz2; j++){ 1353 if (a1[i] == a2[j]) { 1354 k = j; nmatch++; 1355 break; 1356 } 1357 } 1358 } 1359 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1360 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1361 ierr = PetscFree(a1);CHKERRQ(ierr); 1362 ierr = PetscFree(a2);CHKERRQ(ierr); 1363 if (nmatch < sz1) { 1364 *flg = PETSC_FALSE; 1365 } else { 1366 *flg = PETSC_TRUE; 1367 } 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1373 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1374 { 1375 PetscErrorCode ierr; 1376 IS iscol_local; 1377 PetscInt csize; 1378 PetscBool isequal; 1379 1380 PetscFunctionBegin; 1381 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1382 if (call == MAT_REUSE_MATRIX) { 1383 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1384 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1385 } else { 1386 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1387 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1388 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1389 } 1390 1391 /* now call MatGetSubMatrix_MPIBAIJ() */ 1392 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1393 if (call == MAT_INITIAL_MATRIX) { 1394 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1395 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1396 } 1397 PetscFunctionReturn(0); 1398 } 1399 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1402 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1403 { 1404 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1405 PetscErrorCode ierr; 1406 1407 PetscFunctionBegin; 1408 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1409 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1415 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1416 { 1417 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1418 Mat A = a->A,B = a->B; 1419 PetscErrorCode ierr; 1420 PetscReal isend[5],irecv[5]; 1421 1422 PetscFunctionBegin; 1423 info->block_size = (PetscReal)matin->rmap->bs; 1424 1425 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1426 1427 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1428 isend[3] = info->memory; isend[4] = info->mallocs; 1429 1430 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1431 1432 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1433 isend[3] += info->memory; isend[4] += info->mallocs; 1434 if (flag == MAT_LOCAL) { 1435 info->nz_used = isend[0]; 1436 info->nz_allocated = isend[1]; 1437 info->nz_unneeded = isend[2]; 1438 info->memory = isend[3]; 1439 info->mallocs = isend[4]; 1440 } else if (flag == MAT_GLOBAL_MAX) { 1441 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1442 1443 info->nz_used = irecv[0]; 1444 info->nz_allocated = irecv[1]; 1445 info->nz_unneeded = irecv[2]; 1446 info->memory = irecv[3]; 1447 info->mallocs = irecv[4]; 1448 } else if (flag == MAT_GLOBAL_SUM) { 1449 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1450 1451 info->nz_used = irecv[0]; 1452 info->nz_allocated = irecv[1]; 1453 info->nz_unneeded = irecv[2]; 1454 info->memory = irecv[3]; 1455 info->mallocs = irecv[4]; 1456 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1457 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1458 info->fill_ratio_needed = 0; 1459 info->factor_mallocs = 0; 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1465 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1466 { 1467 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1468 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1469 PetscErrorCode ierr; 1470 1471 PetscFunctionBegin; 1472 switch (op) { 1473 case MAT_NEW_NONZERO_LOCATIONS: 1474 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1475 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1476 case MAT_KEEP_NONZERO_PATTERN: 1477 case MAT_NEW_NONZERO_LOCATION_ERR: 1478 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1479 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1480 break; 1481 case MAT_ROW_ORIENTED: 1482 a->roworiented = flg; 1483 1484 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1485 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1486 break; 1487 case MAT_NEW_DIAGONALS: 1488 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1489 break; 1490 case MAT_IGNORE_OFF_PROC_ENTRIES: 1491 a->donotstash = flg; 1492 break; 1493 case MAT_USE_HASH_TABLE: 1494 a->ht_flag = flg; 1495 break; 1496 case MAT_HERMITIAN: 1497 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1498 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1499 1500 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1501 break; 1502 case MAT_SPD: 1503 A->spd_set = PETSC_TRUE; 1504 A->spd = flg; 1505 if (flg) { 1506 A->symmetric = PETSC_TRUE; 1507 A->structurally_symmetric = PETSC_TRUE; 1508 A->symmetric_set = PETSC_TRUE; 1509 A->structurally_symmetric_set = PETSC_TRUE; 1510 } 1511 break; 1512 case MAT_SYMMETRIC: 1513 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1514 break; 1515 case MAT_STRUCTURALLY_SYMMETRIC: 1516 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1517 break; 1518 case MAT_SYMMETRY_ETERNAL: 1519 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1520 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1521 break; 1522 case MAT_IGNORE_LOWER_TRIANGULAR: 1523 aA->ignore_ltriangular = flg; 1524 break; 1525 case MAT_ERROR_LOWER_TRIANGULAR: 1526 aA->ignore_ltriangular = flg; 1527 break; 1528 case MAT_GETROW_UPPERTRIANGULAR: 1529 aA->getrow_utriangular = flg; 1530 break; 1531 default: 1532 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1539 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1540 { 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 if (MAT_INITIAL_MATRIX || *B != A) { 1545 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1546 } 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1552 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1553 { 1554 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1555 Mat a = baij->A, b=baij->B; 1556 PetscErrorCode ierr; 1557 PetscInt nv,m,n; 1558 PetscBool flg; 1559 1560 PetscFunctionBegin; 1561 if (ll != rr) { 1562 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1563 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1564 } 1565 if (!ll) PetscFunctionReturn(0); 1566 1567 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1568 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1569 1570 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1571 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1572 1573 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 1575 /* left diagonalscale the off-diagonal part */ 1576 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1577 1578 /* scale the diagonal part */ 1579 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1580 1581 /* right diagonalscale the off-diagonal part */ 1582 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1589 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1590 { 1591 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1592 PetscErrorCode ierr; 1593 1594 PetscFunctionBegin; 1595 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 1599 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatEqual_MPISBAIJ" 1603 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1604 { 1605 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1606 Mat a,b,c,d; 1607 PetscBool flg; 1608 PetscErrorCode ierr; 1609 1610 PetscFunctionBegin; 1611 a = matA->A; b = matA->B; 1612 c = matB->A; d = matB->B; 1613 1614 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1615 if (flg) { 1616 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1617 } 1618 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatCopy_MPISBAIJ" 1624 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1625 { 1626 PetscErrorCode ierr; 1627 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1628 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1629 1630 PetscFunctionBegin; 1631 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1632 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1633 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1634 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1635 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1636 } else { 1637 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1638 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1639 } 1640 PetscFunctionReturn(0); 1641 } 1642 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1645 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1646 { 1647 PetscErrorCode ierr; 1648 1649 PetscFunctionBegin; 1650 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1651 PetscFunctionReturn(0); 1652 } 1653 1654 #undef __FUNCT__ 1655 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1656 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1657 { 1658 PetscErrorCode ierr; 1659 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1660 PetscBLASInt bnz,one=1; 1661 Mat_SeqSBAIJ *xa,*ya; 1662 Mat_SeqBAIJ *xb,*yb; 1663 1664 PetscFunctionBegin; 1665 if (str == SAME_NONZERO_PATTERN) { 1666 PetscScalar alpha = a; 1667 xa = (Mat_SeqSBAIJ*)xx->A->data; 1668 ya = (Mat_SeqSBAIJ*)yy->A->data; 1669 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1670 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1671 xb = (Mat_SeqBAIJ*)xx->B->data; 1672 yb = (Mat_SeqBAIJ*)yy->B->data; 1673 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1674 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1675 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1676 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1677 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1678 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1679 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1680 } else { 1681 Mat B; 1682 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1683 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1684 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1685 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1686 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1687 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1688 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1689 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1690 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1691 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1692 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1693 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1694 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1695 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1696 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1697 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1698 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1699 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1700 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1701 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1702 } 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1708 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1709 { 1710 PetscErrorCode ierr; 1711 PetscInt i; 1712 PetscBool flg; 1713 1714 PetscFunctionBegin; 1715 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1716 for (i=0; i<n; i++) { 1717 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1718 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1719 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1720 } 1721 } 1722 PetscFunctionReturn(0); 1723 } 1724 1725 /* -------------------------------------------------------------------*/ 1726 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1727 MatGetRow_MPISBAIJ, 1728 MatRestoreRow_MPISBAIJ, 1729 MatMult_MPISBAIJ, 1730 /* 4*/ MatMultAdd_MPISBAIJ, 1731 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1732 MatMultAdd_MPISBAIJ, 1733 0, 1734 0, 1735 0, 1736 /* 10*/ 0, 1737 0, 1738 0, 1739 MatSOR_MPISBAIJ, 1740 MatTranspose_MPISBAIJ, 1741 /* 15*/ MatGetInfo_MPISBAIJ, 1742 MatEqual_MPISBAIJ, 1743 MatGetDiagonal_MPISBAIJ, 1744 MatDiagonalScale_MPISBAIJ, 1745 MatNorm_MPISBAIJ, 1746 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1747 MatAssemblyEnd_MPISBAIJ, 1748 MatSetOption_MPISBAIJ, 1749 MatZeroEntries_MPISBAIJ, 1750 /* 24*/ 0, 1751 0, 1752 0, 1753 0, 1754 0, 1755 /* 29*/ MatSetUp_MPISBAIJ, 1756 0, 1757 0, 1758 0, 1759 0, 1760 /* 34*/ MatDuplicate_MPISBAIJ, 1761 0, 1762 0, 1763 0, 1764 0, 1765 /* 39*/ MatAXPY_MPISBAIJ, 1766 MatGetSubMatrices_MPISBAIJ, 1767 MatIncreaseOverlap_MPISBAIJ, 1768 MatGetValues_MPISBAIJ, 1769 MatCopy_MPISBAIJ, 1770 /* 44*/ 0, 1771 MatScale_MPISBAIJ, 1772 0, 1773 0, 1774 0, 1775 /* 49*/ 0, 1776 0, 1777 0, 1778 0, 1779 0, 1780 /* 54*/ 0, 1781 0, 1782 MatSetUnfactored_MPISBAIJ, 1783 0, 1784 MatSetValuesBlocked_MPISBAIJ, 1785 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1786 0, 1787 0, 1788 0, 1789 0, 1790 /* 64*/ 0, 1791 0, 1792 0, 1793 0, 1794 0, 1795 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1796 0, 1797 0, 1798 0, 1799 0, 1800 /* 74*/ 0, 1801 0, 1802 0, 1803 0, 1804 0, 1805 /* 79*/ 0, 1806 0, 1807 0, 1808 0, 1809 MatLoad_MPISBAIJ, 1810 /* 84*/ 0, 1811 0, 1812 0, 1813 0, 1814 0, 1815 /* 89*/ 0, 1816 0, 1817 0, 1818 0, 1819 0, 1820 /* 94*/ 0, 1821 0, 1822 0, 1823 0, 1824 0, 1825 /* 99*/ 0, 1826 0, 1827 0, 1828 0, 1829 0, 1830 /*104*/ 0, 1831 MatRealPart_MPISBAIJ, 1832 MatImaginaryPart_MPISBAIJ, 1833 MatGetRowUpperTriangular_MPISBAIJ, 1834 MatRestoreRowUpperTriangular_MPISBAIJ, 1835 /*109*/ 0, 1836 0, 1837 0, 1838 0, 1839 0, 1840 /*114*/ 0, 1841 0, 1842 0, 1843 0, 1844 0, 1845 /*119*/ 0, 1846 0, 1847 0, 1848 0, 1849 0, 1850 /*124*/ 0, 1851 0, 1852 0, 1853 0, 1854 0, 1855 /*129*/ 0, 1856 0, 1857 0, 1858 0, 1859 0, 1860 /*134*/ 0, 1861 0, 1862 0, 1863 0, 1864 0, 1865 /*139*/ 0, 1866 0, 1867 0, 1868 0, 1869 0, 1870 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1871 }; 1872 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1875 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1876 { 1877 PetscFunctionBegin; 1878 *a = ((Mat_MPISBAIJ*)A->data)->A; 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1884 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1885 { 1886 Mat_MPISBAIJ *b; 1887 PetscErrorCode ierr; 1888 PetscInt i,mbs,Mbs; 1889 1890 PetscFunctionBegin; 1891 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1892 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1893 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1894 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1895 1896 b = (Mat_MPISBAIJ*)B->data; 1897 mbs = B->rmap->n/bs; 1898 Mbs = B->rmap->N/bs; 1899 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1900 1901 B->rmap->bs = bs; 1902 b->bs2 = bs*bs; 1903 b->mbs = mbs; 1904 b->Mbs = Mbs; 1905 b->nbs = B->cmap->n/bs; 1906 b->Nbs = B->cmap->N/bs; 1907 1908 for (i=0; i<=b->size; i++) { 1909 b->rangebs[i] = B->rmap->range[i]/bs; 1910 } 1911 b->rstartbs = B->rmap->rstart/bs; 1912 b->rendbs = B->rmap->rend/bs; 1913 1914 b->cstartbs = B->cmap->rstart/bs; 1915 b->cendbs = B->cmap->rend/bs; 1916 1917 if (!B->preallocated) { 1918 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1919 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1920 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1921 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1922 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1923 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1924 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1925 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1926 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1927 } 1928 1929 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1930 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1931 1932 B->preallocated = PETSC_TRUE; 1933 PetscFunctionReturn(0); 1934 } 1935 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1938 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1939 { 1940 PetscInt m,rstart,cstart,cend; 1941 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1942 const PetscInt *JJ =0; 1943 PetscScalar *values=0; 1944 PetscErrorCode ierr; 1945 1946 PetscFunctionBegin; 1947 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1948 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1949 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1950 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1951 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1952 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1953 m = B->rmap->n/bs; 1954 rstart = B->rmap->rstart/bs; 1955 cstart = B->cmap->rstart/bs; 1956 cend = B->cmap->rend/bs; 1957 1958 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1959 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1960 for (i=0; i<m; i++) { 1961 nz = ii[i+1] - ii[i]; 1962 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1963 nz_max = PetscMax(nz_max,nz); 1964 JJ = jj + ii[i]; 1965 for (j=0; j<nz; j++) { 1966 if (*JJ >= cstart) break; 1967 JJ++; 1968 } 1969 d = 0; 1970 for (; j<nz; j++) { 1971 if (*JJ++ >= cend) break; 1972 d++; 1973 } 1974 d_nnz[i] = d; 1975 o_nnz[i] = nz - d; 1976 } 1977 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1978 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1979 1980 values = (PetscScalar*)V; 1981 if (!values) { 1982 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 1983 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1984 } 1985 for (i=0; i<m; i++) { 1986 PetscInt row = i + rstart; 1987 PetscInt ncols = ii[i+1] - ii[i]; 1988 const PetscInt *icols = jj + ii[i]; 1989 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1990 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1991 } 1992 1993 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1994 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1995 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1996 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1997 PetscFunctionReturn(0); 1998 } 1999 2000 /*MC 2001 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2002 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2003 the matrix is stored. 2004 2005 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2006 can call MatSetOption(Mat, MAT_HERMITIAN); 2007 2008 Options Database Keys: 2009 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2010 2011 Level: beginner 2012 2013 .seealso: MatCreateMPISBAIJ 2014 M*/ 2015 2016 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "MatCreate_MPISBAIJ" 2020 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2021 { 2022 Mat_MPISBAIJ *b; 2023 PetscErrorCode ierr; 2024 PetscBool flg = PETSC_FALSE; 2025 2026 PetscFunctionBegin; 2027 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2028 B->data = (void*)b; 2029 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2030 2031 B->ops->destroy = MatDestroy_MPISBAIJ; 2032 B->ops->view = MatView_MPISBAIJ; 2033 B->assembled = PETSC_FALSE; 2034 B->insertmode = NOT_SET_VALUES; 2035 2036 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2037 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2038 2039 /* build local table of row and column ownerships */ 2040 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2041 2042 /* build cache for off array entries formed */ 2043 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2044 2045 b->donotstash = PETSC_FALSE; 2046 b->colmap = NULL; 2047 b->garray = NULL; 2048 b->roworiented = PETSC_TRUE; 2049 2050 /* stuff used in block assembly */ 2051 b->barray = 0; 2052 2053 /* stuff used for matrix vector multiply */ 2054 b->lvec = 0; 2055 b->Mvctx = 0; 2056 b->slvec0 = 0; 2057 b->slvec0b = 0; 2058 b->slvec1 = 0; 2059 b->slvec1a = 0; 2060 b->slvec1b = 0; 2061 b->sMvctx = 0; 2062 2063 /* stuff for MatGetRow() */ 2064 b->rowindices = 0; 2065 b->rowvalues = 0; 2066 b->getrowactive = PETSC_FALSE; 2067 2068 /* hash table stuff */ 2069 b->ht = 0; 2070 b->hd = 0; 2071 b->ht_size = 0; 2072 b->ht_flag = PETSC_FALSE; 2073 b->ht_fact = 0; 2074 b->ht_total_ct = 0; 2075 b->ht_insert_ct = 0; 2076 2077 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2078 b->ijonly = PETSC_FALSE; 2079 2080 b->in_loc = 0; 2081 b->v_loc = 0; 2082 b->n_loc = 0; 2083 2084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 2090 #if defined(PETSC_HAVE_ELEMENTAL) 2091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2092 #endif 2093 2094 B->symmetric = PETSC_TRUE; 2095 B->structurally_symmetric = PETSC_TRUE; 2096 B->symmetric_set = PETSC_TRUE; 2097 B->structurally_symmetric_set = PETSC_TRUE; 2098 2099 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2100 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2101 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2102 if (flg) { 2103 PetscReal fact = 1.39; 2104 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2105 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2106 if (fact <= 1.0) fact = 1.39; 2107 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2108 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2109 } 2110 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 /*MC 2115 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2116 2117 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2118 and MATMPISBAIJ otherwise. 2119 2120 Options Database Keys: 2121 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2122 2123 Level: beginner 2124 2125 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2126 M*/ 2127 2128 #undef __FUNCT__ 2129 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2130 /*@C 2131 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2132 the user should preallocate the matrix storage by setting the parameters 2133 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2134 performance can be increased by more than a factor of 50. 2135 2136 Collective on Mat 2137 2138 Input Parameters: 2139 + B - the matrix 2140 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2141 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2142 . d_nz - number of block nonzeros per block row in diagonal portion of local 2143 submatrix (same for all local rows) 2144 . d_nnz - array containing the number of block nonzeros in the various block rows 2145 in the upper triangular and diagonal part of the in diagonal portion of the local 2146 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2147 for the diagonal entry and set a value even if it is zero. 2148 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2149 submatrix (same for all local rows). 2150 - o_nnz - array containing the number of nonzeros in the various block rows of the 2151 off-diagonal portion of the local submatrix that is right of the diagonal 2152 (possibly different for each block row) or NULL. 2153 2154 2155 Options Database Keys: 2156 . -mat_no_unroll - uses code that does not unroll the loops in the 2157 block calculations (much slower) 2158 . -mat_block_size - size of the blocks to use 2159 2160 Notes: 2161 2162 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2163 than it must be used on all processors that share the object for that argument. 2164 2165 If the *_nnz parameter is given then the *_nz parameter is ignored 2166 2167 Storage Information: 2168 For a square global matrix we define each processor's diagonal portion 2169 to be its local rows and the corresponding columns (a square submatrix); 2170 each processor's off-diagonal portion encompasses the remainder of the 2171 local matrix (a rectangular submatrix). 2172 2173 The user can specify preallocated storage for the diagonal part of 2174 the local submatrix with either d_nz or d_nnz (not both). Set 2175 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2176 memory allocation. Likewise, specify preallocated storage for the 2177 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2178 2179 You can call MatGetInfo() to get information on how effective the preallocation was; 2180 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2181 You can also run with the option -info and look for messages with the string 2182 malloc in them to see if additional memory allocation was needed. 2183 2184 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2185 the figure below we depict these three local rows and all columns (0-11). 2186 2187 .vb 2188 0 1 2 3 4 5 6 7 8 9 10 11 2189 -------------------------- 2190 row 3 |. . . d d d o o o o o o 2191 row 4 |. . . d d d o o o o o o 2192 row 5 |. . . d d d o o o o o o 2193 -------------------------- 2194 .ve 2195 2196 Thus, any entries in the d locations are stored in the d (diagonal) 2197 submatrix, and any entries in the o locations are stored in the 2198 o (off-diagonal) submatrix. Note that the d matrix is stored in 2199 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2200 2201 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2202 plus the diagonal part of the d matrix, 2203 and o_nz should indicate the number of block nonzeros per row in the o matrix 2204 2205 In general, for PDE problems in which most nonzeros are near the diagonal, 2206 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2207 or you will get TERRIBLE performance; see the users' manual chapter on 2208 matrices. 2209 2210 Level: intermediate 2211 2212 .keywords: matrix, block, aij, compressed row, sparse, parallel 2213 2214 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2215 @*/ 2216 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2217 { 2218 PetscErrorCode ierr; 2219 2220 PetscFunctionBegin; 2221 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2222 PetscValidType(B,1); 2223 PetscValidLogicalCollectiveInt(B,bs,2); 2224 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2225 PetscFunctionReturn(0); 2226 } 2227 2228 #undef __FUNCT__ 2229 #define __FUNCT__ "MatCreateSBAIJ" 2230 /*@C 2231 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2232 (block compressed row). For good matrix assembly performance 2233 the user should preallocate the matrix storage by setting the parameters 2234 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2235 performance can be increased by more than a factor of 50. 2236 2237 Collective on MPI_Comm 2238 2239 Input Parameters: 2240 + comm - MPI communicator 2241 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2242 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2243 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2244 This value should be the same as the local size used in creating the 2245 y vector for the matrix-vector product y = Ax. 2246 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2247 This value should be the same as the local size used in creating the 2248 x vector for the matrix-vector product y = Ax. 2249 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2250 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2251 . d_nz - number of block nonzeros per block row in diagonal portion of local 2252 submatrix (same for all local rows) 2253 . d_nnz - array containing the number of block nonzeros in the various block rows 2254 in the upper triangular portion of the in diagonal portion of the local 2255 (possibly different for each block block row) or NULL. 2256 If you plan to factor the matrix you must leave room for the diagonal entry and 2257 set its value even if it is zero. 2258 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2259 submatrix (same for all local rows). 2260 - o_nnz - array containing the number of nonzeros in the various block rows of the 2261 off-diagonal portion of the local submatrix (possibly different for 2262 each block row) or NULL. 2263 2264 Output Parameter: 2265 . A - the matrix 2266 2267 Options Database Keys: 2268 . -mat_no_unroll - uses code that does not unroll the loops in the 2269 block calculations (much slower) 2270 . -mat_block_size - size of the blocks to use 2271 . -mat_mpi - use the parallel matrix data structures even on one processor 2272 (defaults to using SeqBAIJ format on one processor) 2273 2274 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2275 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2276 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2277 2278 Notes: 2279 The number of rows and columns must be divisible by blocksize. 2280 This matrix type does not support complex Hermitian operation. 2281 2282 The user MUST specify either the local or global matrix dimensions 2283 (possibly both). 2284 2285 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2286 than it must be used on all processors that share the object for that argument. 2287 2288 If the *_nnz parameter is given then the *_nz parameter is ignored 2289 2290 Storage Information: 2291 For a square global matrix we define each processor's diagonal portion 2292 to be its local rows and the corresponding columns (a square submatrix); 2293 each processor's off-diagonal portion encompasses the remainder of the 2294 local matrix (a rectangular submatrix). 2295 2296 The user can specify preallocated storage for the diagonal part of 2297 the local submatrix with either d_nz or d_nnz (not both). Set 2298 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2299 memory allocation. Likewise, specify preallocated storage for the 2300 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2301 2302 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2303 the figure below we depict these three local rows and all columns (0-11). 2304 2305 .vb 2306 0 1 2 3 4 5 6 7 8 9 10 11 2307 -------------------------- 2308 row 3 |. . . d d d o o o o o o 2309 row 4 |. . . d d d o o o o o o 2310 row 5 |. . . d d d o o o o o o 2311 -------------------------- 2312 .ve 2313 2314 Thus, any entries in the d locations are stored in the d (diagonal) 2315 submatrix, and any entries in the o locations are stored in the 2316 o (off-diagonal) submatrix. Note that the d matrix is stored in 2317 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2318 2319 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2320 plus the diagonal part of the d matrix, 2321 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2322 In general, for PDE problems in which most nonzeros are near the diagonal, 2323 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2324 or you will get TERRIBLE performance; see the users' manual chapter on 2325 matrices. 2326 2327 Level: intermediate 2328 2329 .keywords: matrix, block, aij, compressed row, sparse, parallel 2330 2331 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2332 @*/ 2333 2334 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2335 { 2336 PetscErrorCode ierr; 2337 PetscMPIInt size; 2338 2339 PetscFunctionBegin; 2340 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2341 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2342 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2343 if (size > 1) { 2344 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2345 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2346 } else { 2347 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2348 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2349 } 2350 PetscFunctionReturn(0); 2351 } 2352 2353 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2356 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2357 { 2358 Mat mat; 2359 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2360 PetscErrorCode ierr; 2361 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2362 PetscScalar *array; 2363 2364 PetscFunctionBegin; 2365 *newmat = 0; 2366 2367 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2368 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2369 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2370 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2371 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2372 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2373 2374 mat->factortype = matin->factortype; 2375 mat->preallocated = PETSC_TRUE; 2376 mat->assembled = PETSC_TRUE; 2377 mat->insertmode = NOT_SET_VALUES; 2378 2379 a = (Mat_MPISBAIJ*)mat->data; 2380 a->bs2 = oldmat->bs2; 2381 a->mbs = oldmat->mbs; 2382 a->nbs = oldmat->nbs; 2383 a->Mbs = oldmat->Mbs; 2384 a->Nbs = oldmat->Nbs; 2385 2386 2387 a->size = oldmat->size; 2388 a->rank = oldmat->rank; 2389 a->donotstash = oldmat->donotstash; 2390 a->roworiented = oldmat->roworiented; 2391 a->rowindices = 0; 2392 a->rowvalues = 0; 2393 a->getrowactive = PETSC_FALSE; 2394 a->barray = 0; 2395 a->rstartbs = oldmat->rstartbs; 2396 a->rendbs = oldmat->rendbs; 2397 a->cstartbs = oldmat->cstartbs; 2398 a->cendbs = oldmat->cendbs; 2399 2400 /* hash table stuff */ 2401 a->ht = 0; 2402 a->hd = 0; 2403 a->ht_size = 0; 2404 a->ht_flag = oldmat->ht_flag; 2405 a->ht_fact = oldmat->ht_fact; 2406 a->ht_total_ct = 0; 2407 a->ht_insert_ct = 0; 2408 2409 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2410 if (oldmat->colmap) { 2411 #if defined(PETSC_USE_CTABLE) 2412 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2413 #else 2414 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2415 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2416 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2417 #endif 2418 } else a->colmap = 0; 2419 2420 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2421 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2422 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2423 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2424 } else a->garray = 0; 2425 2426 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2427 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2428 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2429 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2430 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2431 2432 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2433 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2434 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2435 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2436 2437 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2438 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2439 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2440 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2441 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2442 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2443 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2444 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2445 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2446 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2447 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2448 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2449 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2450 2451 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2452 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2453 a->sMvctx = oldmat->sMvctx; 2454 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2455 2456 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2457 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2458 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2459 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2460 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2461 *newmat = mat; 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatLoad_MPISBAIJ" 2467 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2468 { 2469 PetscErrorCode ierr; 2470 PetscInt i,nz,j,rstart,rend; 2471 PetscScalar *vals,*buf; 2472 MPI_Comm comm; 2473 MPI_Status status; 2474 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2475 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2476 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2477 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2478 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2479 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2480 int fd; 2481 2482 PetscFunctionBegin; 2483 /* force binary viewer to load .info file if it has not yet done so */ 2484 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2485 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2486 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2487 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2488 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2489 if (bs < 0) bs = 1; 2490 2491 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2492 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2493 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2494 if (!rank) { 2495 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2496 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2497 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2498 } 2499 2500 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2501 2502 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2503 M = header[1]; 2504 N = header[2]; 2505 2506 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2507 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2508 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2509 2510 /* If global sizes are set, check if they are consistent with that given in the file */ 2511 if (sizesset) { 2512 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2513 } 2514 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2515 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2516 2517 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2518 2519 /* 2520 This code adds extra rows to make sure the number of rows is 2521 divisible by the blocksize 2522 */ 2523 Mbs = M/bs; 2524 extra_rows = bs - M + bs*(Mbs); 2525 if (extra_rows == bs) extra_rows = 0; 2526 else Mbs++; 2527 if (extra_rows &&!rank) { 2528 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2529 } 2530 2531 /* determine ownership of all rows */ 2532 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2533 mbs = Mbs/size + ((Mbs % size) > rank); 2534 m = mbs*bs; 2535 } else { /* User Set */ 2536 m = newmat->rmap->n; 2537 mbs = m/bs; 2538 } 2539 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2540 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2541 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2542 rowners[0] = 0; 2543 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2544 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2545 rstart = rowners[rank]; 2546 rend = rowners[rank+1]; 2547 2548 /* distribute row lengths to all processors */ 2549 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2550 if (!rank) { 2551 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2552 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2553 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2554 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2555 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2556 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2557 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2558 } else { 2559 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2560 } 2561 2562 if (!rank) { /* procs[0] */ 2563 /* calculate the number of nonzeros on each processor */ 2564 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2565 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2566 for (i=0; i<size; i++) { 2567 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2568 procsnz[i] += rowlengths[j]; 2569 } 2570 } 2571 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2572 2573 /* determine max buffer needed and allocate it */ 2574 maxnz = 0; 2575 for (i=0; i<size; i++) { 2576 maxnz = PetscMax(maxnz,procsnz[i]); 2577 } 2578 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2579 2580 /* read in my part of the matrix column indices */ 2581 nz = procsnz[0]; 2582 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2583 mycols = ibuf; 2584 if (size == 1) nz -= extra_rows; 2585 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2586 if (size == 1) { 2587 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2588 } 2589 2590 /* read in every ones (except the last) and ship off */ 2591 for (i=1; i<size-1; i++) { 2592 nz = procsnz[i]; 2593 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2594 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2595 } 2596 /* read in the stuff for the last proc */ 2597 if (size != 1) { 2598 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2599 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2600 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2601 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2602 } 2603 ierr = PetscFree(cols);CHKERRQ(ierr); 2604 } else { /* procs[i], i>0 */ 2605 /* determine buffer space needed for message */ 2606 nz = 0; 2607 for (i=0; i<m; i++) nz += locrowlens[i]; 2608 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2609 mycols = ibuf; 2610 /* receive message of column indices*/ 2611 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2612 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2613 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2614 } 2615 2616 /* loop over local rows, determining number of off diagonal entries */ 2617 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2618 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2619 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2620 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2621 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2622 rowcount = 0; 2623 nzcount = 0; 2624 for (i=0; i<mbs; i++) { 2625 dcount = 0; 2626 odcount = 0; 2627 for (j=0; j<bs; j++) { 2628 kmax = locrowlens[rowcount]; 2629 for (k=0; k<kmax; k++) { 2630 tmp = mycols[nzcount++]/bs; /* block col. index */ 2631 if (!mask[tmp]) { 2632 mask[tmp] = 1; 2633 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2634 else masked1[dcount++] = tmp; /* entry in diag portion */ 2635 } 2636 } 2637 rowcount++; 2638 } 2639 2640 dlens[i] = dcount; /* d_nzz[i] */ 2641 odlens[i] = odcount; /* o_nzz[i] */ 2642 2643 /* zero out the mask elements we set */ 2644 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2645 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2646 } 2647 if (!sizesset) { 2648 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2649 } 2650 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2651 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2652 2653 if (!rank) { 2654 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2655 /* read in my part of the matrix numerical values */ 2656 nz = procsnz[0]; 2657 vals = buf; 2658 mycols = ibuf; 2659 if (size == 1) nz -= extra_rows; 2660 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2661 if (size == 1) { 2662 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2663 } 2664 2665 /* insert into matrix */ 2666 jj = rstart*bs; 2667 for (i=0; i<m; i++) { 2668 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2669 mycols += locrowlens[i]; 2670 vals += locrowlens[i]; 2671 jj++; 2672 } 2673 2674 /* read in other processors (except the last one) and ship out */ 2675 for (i=1; i<size-1; i++) { 2676 nz = procsnz[i]; 2677 vals = buf; 2678 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2679 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2680 } 2681 /* the last proc */ 2682 if (size != 1) { 2683 nz = procsnz[i] - extra_rows; 2684 vals = buf; 2685 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2686 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2687 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2688 } 2689 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2690 2691 } else { 2692 /* receive numeric values */ 2693 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2694 2695 /* receive message of values*/ 2696 vals = buf; 2697 mycols = ibuf; 2698 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2699 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2700 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2701 2702 /* insert into matrix */ 2703 jj = rstart*bs; 2704 for (i=0; i<m; i++) { 2705 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2706 mycols += locrowlens[i]; 2707 vals += locrowlens[i]; 2708 jj++; 2709 } 2710 } 2711 2712 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2713 ierr = PetscFree(buf);CHKERRQ(ierr); 2714 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2715 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2716 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2717 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2718 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2719 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2720 PetscFunctionReturn(0); 2721 } 2722 2723 #undef __FUNCT__ 2724 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2725 /*XXXXX@ 2726 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2727 2728 Input Parameters: 2729 . mat - the matrix 2730 . fact - factor 2731 2732 Not Collective on Mat, each process can have a different hash factor 2733 2734 Level: advanced 2735 2736 Notes: 2737 This can also be set by the command line option: -mat_use_hash_table fact 2738 2739 .keywords: matrix, hashtable, factor, HT 2740 2741 .seealso: MatSetOption() 2742 @XXXXX*/ 2743 2744 2745 #undef __FUNCT__ 2746 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2747 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2748 { 2749 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2750 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2751 PetscReal atmp; 2752 PetscReal *work,*svalues,*rvalues; 2753 PetscErrorCode ierr; 2754 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2755 PetscMPIInt rank,size; 2756 PetscInt *rowners_bs,dest,count,source; 2757 PetscScalar *va; 2758 MatScalar *ba; 2759 MPI_Status stat; 2760 2761 PetscFunctionBegin; 2762 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2763 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2764 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2765 2766 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2767 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2768 2769 bs = A->rmap->bs; 2770 mbs = a->mbs; 2771 Mbs = a->Mbs; 2772 ba = b->a; 2773 bi = b->i; 2774 bj = b->j; 2775 2776 /* find ownerships */ 2777 rowners_bs = A->rmap->range; 2778 2779 /* each proc creates an array to be distributed */ 2780 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2781 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2782 2783 /* row_max for B */ 2784 if (rank != size-1) { 2785 for (i=0; i<mbs; i++) { 2786 ncols = bi[1] - bi[0]; bi++; 2787 brow = bs*i; 2788 for (j=0; j<ncols; j++) { 2789 bcol = bs*(*bj); 2790 for (kcol=0; kcol<bs; kcol++) { 2791 col = bcol + kcol; /* local col index */ 2792 col += rowners_bs[rank+1]; /* global col index */ 2793 for (krow=0; krow<bs; krow++) { 2794 atmp = PetscAbsScalar(*ba); ba++; 2795 row = brow + krow; /* local row index */ 2796 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2797 if (work[col] < atmp) work[col] = atmp; 2798 } 2799 } 2800 bj++; 2801 } 2802 } 2803 2804 /* send values to its owners */ 2805 for (dest=rank+1; dest<size; dest++) { 2806 svalues = work + rowners_bs[dest]; 2807 count = rowners_bs[dest+1]-rowners_bs[dest]; 2808 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2809 } 2810 } 2811 2812 /* receive values */ 2813 if (rank) { 2814 rvalues = work; 2815 count = rowners_bs[rank+1]-rowners_bs[rank]; 2816 for (source=0; source<rank; source++) { 2817 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2818 /* process values */ 2819 for (i=0; i<count; i++) { 2820 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2821 } 2822 } 2823 } 2824 2825 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2826 ierr = PetscFree(work);CHKERRQ(ierr); 2827 PetscFunctionReturn(0); 2828 } 2829 2830 #undef __FUNCT__ 2831 #define __FUNCT__ "MatSOR_MPISBAIJ" 2832 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2833 { 2834 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2835 PetscErrorCode ierr; 2836 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2837 PetscScalar *x,*ptr,*from; 2838 Vec bb1; 2839 const PetscScalar *b; 2840 2841 PetscFunctionBegin; 2842 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2843 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2844 2845 if (flag == SOR_APPLY_UPPER) { 2846 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2847 PetscFunctionReturn(0); 2848 } 2849 2850 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2851 if (flag & SOR_ZERO_INITIAL_GUESS) { 2852 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2853 its--; 2854 } 2855 2856 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2857 while (its--) { 2858 2859 /* lower triangular part: slvec0b = - B^T*xx */ 2860 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2861 2862 /* copy xx into slvec0a */ 2863 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2864 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2865 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2866 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2867 2868 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2869 2870 /* copy bb into slvec1a */ 2871 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2872 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2873 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2874 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2875 2876 /* set slvec1b = 0 */ 2877 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2878 2879 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2880 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2881 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2882 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2883 2884 /* upper triangular part: bb1 = bb1 - B*x */ 2885 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2886 2887 /* local diagonal sweep */ 2888 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2889 } 2890 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2891 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2892 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2893 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2894 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2895 } else if (flag & SOR_EISENSTAT) { 2896 Vec xx1; 2897 PetscBool hasop; 2898 const PetscScalar *diag; 2899 PetscScalar *sl,scale = (omega - 2.0)/omega; 2900 PetscInt i,n; 2901 2902 if (!mat->xx1) { 2903 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2904 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2905 } 2906 xx1 = mat->xx1; 2907 bb1 = mat->bb1; 2908 2909 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2910 2911 if (!mat->diag) { 2912 /* this is wrong for same matrix with new nonzero values */ 2913 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2914 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2915 } 2916 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2917 2918 if (hasop) { 2919 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2920 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2921 } else { 2922 /* 2923 These two lines are replaced by code that may be a bit faster for a good compiler 2924 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2925 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2926 */ 2927 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2928 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2929 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2930 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2931 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2932 if (omega == 1.0) { 2933 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2934 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2935 } else { 2936 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2937 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2938 } 2939 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2940 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2941 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2942 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2943 } 2944 2945 /* multiply off-diagonal portion of matrix */ 2946 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2947 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2948 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2949 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2950 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2951 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2952 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2953 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2954 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2955 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2956 2957 /* local sweep */ 2958 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2959 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2960 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2961 PetscFunctionReturn(0); 2962 } 2963 2964 #undef __FUNCT__ 2965 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2966 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2967 { 2968 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2969 PetscErrorCode ierr; 2970 Vec lvec1,bb1; 2971 2972 PetscFunctionBegin; 2973 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2974 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2975 2976 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2977 if (flag & SOR_ZERO_INITIAL_GUESS) { 2978 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2979 its--; 2980 } 2981 2982 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2983 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2984 while (its--) { 2985 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2986 2987 /* lower diagonal part: bb1 = bb - B^T*xx */ 2988 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2989 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2990 2991 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2992 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2993 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2994 2995 /* upper diagonal part: bb1 = bb1 - B*x */ 2996 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2997 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2998 2999 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3000 3001 /* diagonal sweep */ 3002 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3003 } 3004 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3005 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3006 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3007 PetscFunctionReturn(0); 3008 } 3009 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3012 /*@ 3013 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3014 CSR format the local rows. 3015 3016 Collective on MPI_Comm 3017 3018 Input Parameters: 3019 + comm - MPI communicator 3020 . bs - the block size, only a block size of 1 is supported 3021 . m - number of local rows (Cannot be PETSC_DECIDE) 3022 . n - This value should be the same as the local size used in creating the 3023 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3024 calculated if N is given) For square matrices n is almost always m. 3025 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3026 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3027 . i - row indices 3028 . j - column indices 3029 - a - matrix values 3030 3031 Output Parameter: 3032 . mat - the matrix 3033 3034 Level: intermediate 3035 3036 Notes: 3037 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3038 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3039 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3040 3041 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3042 3043 .keywords: matrix, aij, compressed row, sparse, parallel 3044 3045 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3046 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3047 @*/ 3048 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3049 { 3050 PetscErrorCode ierr; 3051 3052 3053 PetscFunctionBegin; 3054 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3055 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3056 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3057 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3058 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3059 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3060 PetscFunctionReturn(0); 3061 } 3062 3063 3064 #undef __FUNCT__ 3065 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3066 /*@C 3067 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3068 (the default parallel PETSc format). 3069 3070 Collective on MPI_Comm 3071 3072 Input Parameters: 3073 + B - the matrix 3074 . bs - the block size 3075 . i - the indices into j for the start of each local row (starts with zero) 3076 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3077 - v - optional values in the matrix 3078 3079 Level: developer 3080 3081 .keywords: matrix, aij, compressed row, sparse, parallel 3082 3083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3084 @*/ 3085 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3086 { 3087 PetscErrorCode ierr; 3088 3089 PetscFunctionBegin; 3090 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3091 PetscFunctionReturn(0); 3092 } 3093 3094 #undef __FUNCT__ 3095 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3096 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3097 { 3098 PetscErrorCode ierr; 3099 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3100 PetscInt *indx; 3101 PetscScalar *values; 3102 3103 PetscFunctionBegin; 3104 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3105 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3106 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3107 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3108 PetscInt *bindx,rmax=a->rmax,j; 3109 3110 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3111 mbs = m/bs; Nbs = N/cbs; 3112 if (n == PETSC_DECIDE) { 3113 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3114 } 3115 /* Check sum(n) = Nbs */ 3116 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3117 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3118 3119 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3120 rstart -= mbs; 3121 3122 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3123 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3124 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3125 for (i=0; i<mbs; i++) { 3126 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3127 nnz = nnz/bs; 3128 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3129 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3130 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3131 } 3132 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3133 ierr = PetscFree(bindx);CHKERRQ(ierr); 3134 3135 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3136 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3137 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3138 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3139 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3140 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3141 } 3142 3143 /* numeric phase */ 3144 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3145 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3146 3147 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3148 for (i=0; i<m; i++) { 3149 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3150 Ii = i + rstart; 3151 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3152 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3153 } 3154 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3155 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3156 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3157 PetscFunctionReturn(0); 3158 } 3159