1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 } 637 /* Bmat */ 638 v = bmat->a; jj = bmat->j; 639 for (brow=0; brow<mbs; brow++) { 640 grow = bs*(rstart + brow); 641 nz = bmat->i[brow+1] - bmat->i[brow]; 642 for (bcol=0; bcol<nz; bcol++) { 643 gcol = bs*garray[*jj]; jj++; 644 for (col=0; col<bs; col++) { 645 for (row=0; row<bs; row++) { 646 vabs = PetscAbsScalar(*v); v++; 647 rsum[gcol+col] += vabs; 648 rsum[grow+row] += vabs; 649 } 650 } 651 } 652 } 653 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 654 *norm = 0.0; 655 for (col=0; col<mat->cmap->N; col++) { 656 if (rsum2[col] > *norm) *norm = rsum2[col]; 657 } 658 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 660 } 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscInt nstash,reallocs; 671 InsertMode addv; 672 673 PetscFunctionBegin; 674 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 675 676 /* make sure all processors are either in INSERTMODE or ADDMODE */ 677 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 678 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 679 mat->insertmode = addv; /* in case this processor had no cache */ 680 681 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 682 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 683 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 684 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 685 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 686 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 692 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 693 { 694 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 695 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 696 PetscErrorCode ierr; 697 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 698 PetscInt *row,*col; 699 PetscBool other_disassembled; 700 PetscMPIInt n; 701 PetscBool r1,r2,r3; 702 MatScalar *val; 703 InsertMode addv = mat->insertmode; 704 705 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 706 PetscFunctionBegin; 707 if (!baij->donotstash && !mat->nooffprocentries) { 708 while (1) { 709 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 710 if (!flg) break; 711 712 for (i=0; i<n;) { 713 /* Now identify the consecutive vals belonging to the same row */ 714 for (j=i,rstart=row[j]; j<n; j++) { 715 if (row[j] != rstart) break; 716 } 717 if (j < n) ncols = j-i; 718 else ncols = n-i; 719 /* Now assemble all these values with a single function call */ 720 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 721 i = j; 722 } 723 } 724 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 725 /* Now process the block-stash. Since the values are stashed column-oriented, 726 set the roworiented flag to column oriented, and after MatSetValues() 727 restore the original flags */ 728 r1 = baij->roworiented; 729 r2 = a->roworiented; 730 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 731 732 baij->roworiented = PETSC_FALSE; 733 a->roworiented = PETSC_FALSE; 734 735 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 736 while (1) { 737 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 738 if (!flg) break; 739 740 for (i=0; i<n;) { 741 /* Now identify the consecutive vals belonging to the same row */ 742 for (j=i,rstart=row[j]; j<n; j++) { 743 if (row[j] != rstart) break; 744 } 745 if (j < n) ncols = j-i; 746 else ncols = n-i; 747 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 748 i = j; 749 } 750 } 751 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 752 753 baij->roworiented = r1; 754 a->roworiented = r2; 755 756 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 757 } 758 759 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 760 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 761 762 /* determine if any processor has disassembled, if so we must 763 also disassemble ourselfs, in order that we may reassemble. */ 764 /* 765 if nonzero structure of submatrix B cannot change then we know that 766 no processor disassembled thus we can skip this stuff 767 */ 768 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 769 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 770 if (mat->was_assembled && !other_disassembled) { 771 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 772 } 773 } 774 775 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 776 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 777 } 778 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 779 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 780 781 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 782 783 baij->rowvalues = 0; 784 785 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 786 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 787 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 788 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 789 } 790 PetscFunctionReturn(0); 791 } 792 793 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 794 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 795 #include <petscdraw.h> 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 798 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 799 { 800 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 801 PetscErrorCode ierr; 802 PetscInt bs = mat->rmap->bs; 803 PetscMPIInt rank = baij->rank; 804 PetscBool iascii,isdraw; 805 PetscViewer sviewer; 806 PetscViewerFormat format; 807 808 PetscFunctionBegin; 809 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 810 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 811 if (iascii) { 812 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 813 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 814 MatInfo info; 815 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 816 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 818 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 819 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 820 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 821 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 822 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 823 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 824 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 825 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 826 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } else if (format == PETSC_VIEWER_ASCII_INFO) { 829 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 832 PetscFunctionReturn(0); 833 } 834 } 835 836 if (isdraw) { 837 PetscDraw draw; 838 PetscBool isnull; 839 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 840 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 841 } 842 843 { 844 /* assemble the entire matrix onto first processor. */ 845 Mat A; 846 Mat_SeqSBAIJ *Aloc; 847 Mat_SeqBAIJ *Bloc; 848 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 849 MatScalar *a; 850 const char *matname; 851 852 /* Should this be the same type as mat? */ 853 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 854 if (!rank) { 855 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 856 } else { 857 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 858 } 859 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 860 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 861 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 862 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 863 864 /* copy over the A part */ 865 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 866 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 867 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 868 869 for (i=0; i<mbs; i++) { 870 rvals[0] = bs*(baij->rstartbs + i); 871 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 872 for (j=ai[i]; j<ai[i+1]; j++) { 873 col = (baij->cstartbs+aj[j])*bs; 874 for (k=0; k<bs; k++) { 875 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 876 col++; 877 a += bs; 878 } 879 } 880 } 881 /* copy over the B part */ 882 Bloc = (Mat_SeqBAIJ*)baij->B->data; 883 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 884 for (i=0; i<mbs; i++) { 885 886 rvals[0] = bs*(baij->rstartbs + i); 887 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 888 for (j=ai[i]; j<ai[i+1]; j++) { 889 col = baij->garray[aj[j]]*bs; 890 for (k=0; k<bs; k++) { 891 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 892 col++; 893 a += bs; 894 } 895 } 896 } 897 ierr = PetscFree(rvals);CHKERRQ(ierr); 898 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 899 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 900 /* 901 Everyone has to call to draw the matrix since the graphics waits are 902 synchronized across all processors that share the PetscDraw object 903 */ 904 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 905 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 906 if (!rank) { 907 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 908 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 909 } 910 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 911 ierr = MatDestroy(&A);CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } 915 916 #undef __FUNCT__ 917 #define __FUNCT__ "MatView_MPISBAIJ" 918 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 919 { 920 PetscErrorCode ierr; 921 PetscBool iascii,isdraw,issocket,isbinary; 922 923 PetscFunctionBegin; 924 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 925 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 926 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 927 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 928 if (iascii || isdraw || issocket || isbinary) { 929 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatDestroy_MPISBAIJ" 936 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 937 { 938 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 #if defined(PETSC_USE_LOG) 943 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 944 #endif 945 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 946 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 947 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 948 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 949 #if defined(PETSC_USE_CTABLE) 950 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 951 #else 952 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 953 #endif 954 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 955 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 956 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 957 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 958 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 959 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 960 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 961 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 962 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 963 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 964 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 965 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 966 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 967 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 968 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 969 #if defined(PETSC_USE_REAL_MAT_SINGLE) 970 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 971 #endif 972 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 973 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 974 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 975 ierr = PetscFree(mat->data);CHKERRQ(ierr); 976 977 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 978 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 979 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 980 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 981 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 982 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 983 #if defined(PETSC_HAVE_ELEMENTAL) 984 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 985 #endif 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 991 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 992 { 993 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 994 PetscErrorCode ierr; 995 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 996 PetscScalar *from; 997 const PetscScalar *x; 998 999 PetscFunctionBegin; 1000 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1001 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1002 1003 /* diagonal part */ 1004 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1005 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1006 1007 /* subdiagonal part */ 1008 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1009 1010 /* copy x into the vec slvec0 */ 1011 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1012 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1013 1014 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1015 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1016 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1017 1018 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1019 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1020 /* supperdiagonal part */ 1021 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatMult_MPISBAIJ" 1027 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1028 { 1029 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1030 PetscErrorCode ierr; 1031 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1032 PetscScalar *from; 1033 const PetscScalar *x; 1034 1035 PetscFunctionBegin; 1036 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1037 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1038 1039 /* diagonal part */ 1040 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1041 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1042 1043 /* subdiagonal part */ 1044 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1045 1046 /* copy x into the vec slvec0 */ 1047 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1048 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1049 1050 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1051 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1052 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1053 1054 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1056 /* supperdiagonal part */ 1057 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1063 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1064 { 1065 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt nt; 1068 1069 PetscFunctionBegin; 1070 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1071 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1072 1073 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1074 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1075 1076 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1077 /* do diagonal part */ 1078 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1079 /* do supperdiagonal part */ 1080 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1082 /* do subdiagonal part */ 1083 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1084 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1091 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1092 { 1093 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1096 PetscScalar *from,zero=0.0; 1097 const PetscScalar *x; 1098 1099 PetscFunctionBegin; 1100 /* 1101 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1102 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1103 */ 1104 /* diagonal part */ 1105 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1106 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1107 1108 /* subdiagonal part */ 1109 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1110 1111 /* copy x into the vec slvec0 */ 1112 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1113 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1114 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1115 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1116 1117 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1119 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 1121 /* supperdiagonal part */ 1122 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1128 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1129 { 1130 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1135 /* do diagonal part */ 1136 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1137 /* do supperdiagonal part */ 1138 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1140 1141 /* do subdiagonal part */ 1142 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1143 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1144 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 /* 1149 This only works correctly for square matrices where the subblock A->A is the 1150 diagonal block 1151 */ 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1154 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1155 { 1156 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1157 PetscErrorCode ierr; 1158 1159 PetscFunctionBegin; 1160 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1161 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatScale_MPISBAIJ" 1167 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1168 { 1169 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1170 PetscErrorCode ierr; 1171 1172 PetscFunctionBegin; 1173 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1174 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1180 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1181 { 1182 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1183 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1184 PetscErrorCode ierr; 1185 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1186 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1187 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1188 1189 PetscFunctionBegin; 1190 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1191 mat->getrowactive = PETSC_TRUE; 1192 1193 if (!mat->rowvalues && (idx || v)) { 1194 /* 1195 allocate enough space to hold information from the longest row. 1196 */ 1197 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1198 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1199 PetscInt max = 1,mbs = mat->mbs,tmp; 1200 for (i=0; i<mbs; i++) { 1201 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1202 if (max < tmp) max = tmp; 1203 } 1204 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1205 } 1206 1207 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1208 lrow = row - brstart; /* local row index */ 1209 1210 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1211 if (!v) {pvA = 0; pvB = 0;} 1212 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1213 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1214 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1215 nztot = nzA + nzB; 1216 1217 cmap = mat->garray; 1218 if (v || idx) { 1219 if (nztot) { 1220 /* Sort by increasing column numbers, assuming A and B already sorted */ 1221 PetscInt imark = -1; 1222 if (v) { 1223 *v = v_p = mat->rowvalues; 1224 for (i=0; i<nzB; i++) { 1225 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1226 else break; 1227 } 1228 imark = i; 1229 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1230 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1231 } 1232 if (idx) { 1233 *idx = idx_p = mat->rowindices; 1234 if (imark > -1) { 1235 for (i=0; i<imark; i++) { 1236 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1237 } 1238 } else { 1239 for (i=0; i<nzB; i++) { 1240 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1241 else break; 1242 } 1243 imark = i; 1244 } 1245 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1246 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1247 } 1248 } else { 1249 if (idx) *idx = 0; 1250 if (v) *v = 0; 1251 } 1252 } 1253 *nz = nztot; 1254 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1255 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1261 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1262 { 1263 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1264 1265 PetscFunctionBegin; 1266 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1267 baij->getrowactive = PETSC_FALSE; 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1273 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1274 { 1275 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1276 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1277 1278 PetscFunctionBegin; 1279 aA->getrow_utriangular = PETSC_TRUE; 1280 PetscFunctionReturn(0); 1281 } 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1284 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1285 { 1286 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1287 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1288 1289 PetscFunctionBegin; 1290 aA->getrow_utriangular = PETSC_FALSE; 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNCT__ 1295 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1296 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1297 { 1298 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1299 PetscErrorCode ierr; 1300 1301 PetscFunctionBegin; 1302 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1303 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1309 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1310 { 1311 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1316 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1321 Input: isrow - distributed(parallel), 1322 iscol_local - locally owned (seq) 1323 */ 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "ISEqual_private" 1326 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1327 { 1328 PetscErrorCode ierr; 1329 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1330 const PetscInt *ptr1,*ptr2; 1331 1332 PetscFunctionBegin; 1333 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1334 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1335 if (sz1 > sz2) { 1336 *flg = PETSC_FALSE; 1337 PetscFunctionReturn(0); 1338 } 1339 1340 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1341 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1342 1343 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1344 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1345 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1346 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1347 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1348 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1349 1350 nmatch=0; 1351 k = 0; 1352 for (i=0; i<sz1; i++){ 1353 for (j=k; j<sz2; j++){ 1354 if (a1[i] == a2[j]) { 1355 k = j; nmatch++; 1356 break; 1357 } 1358 } 1359 } 1360 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1361 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1362 ierr = PetscFree(a1);CHKERRQ(ierr); 1363 ierr = PetscFree(a2);CHKERRQ(ierr); 1364 if (nmatch < sz1) { 1365 *flg = PETSC_FALSE; 1366 } else { 1367 *flg = PETSC_TRUE; 1368 } 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1374 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1375 { 1376 PetscErrorCode ierr; 1377 IS iscol_local; 1378 PetscInt csize; 1379 PetscBool isequal; 1380 1381 PetscFunctionBegin; 1382 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1383 if (call == MAT_REUSE_MATRIX) { 1384 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1385 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1386 } else { 1387 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1388 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1389 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1390 } 1391 1392 /* now call MatGetSubMatrix_MPIBAIJ() */ 1393 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1394 if (call == MAT_INITIAL_MATRIX) { 1395 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1396 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1403 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1404 { 1405 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1406 PetscErrorCode ierr; 1407 1408 PetscFunctionBegin; 1409 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1410 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1416 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1417 { 1418 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1419 Mat A = a->A,B = a->B; 1420 PetscErrorCode ierr; 1421 PetscReal isend[5],irecv[5]; 1422 1423 PetscFunctionBegin; 1424 info->block_size = (PetscReal)matin->rmap->bs; 1425 1426 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1427 1428 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1429 isend[3] = info->memory; isend[4] = info->mallocs; 1430 1431 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1432 1433 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1434 isend[3] += info->memory; isend[4] += info->mallocs; 1435 if (flag == MAT_LOCAL) { 1436 info->nz_used = isend[0]; 1437 info->nz_allocated = isend[1]; 1438 info->nz_unneeded = isend[2]; 1439 info->memory = isend[3]; 1440 info->mallocs = isend[4]; 1441 } else if (flag == MAT_GLOBAL_MAX) { 1442 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1443 1444 info->nz_used = irecv[0]; 1445 info->nz_allocated = irecv[1]; 1446 info->nz_unneeded = irecv[2]; 1447 info->memory = irecv[3]; 1448 info->mallocs = irecv[4]; 1449 } else if (flag == MAT_GLOBAL_SUM) { 1450 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1451 1452 info->nz_used = irecv[0]; 1453 info->nz_allocated = irecv[1]; 1454 info->nz_unneeded = irecv[2]; 1455 info->memory = irecv[3]; 1456 info->mallocs = irecv[4]; 1457 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1458 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1459 info->fill_ratio_needed = 0; 1460 info->factor_mallocs = 0; 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1466 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1467 { 1468 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1469 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1470 PetscErrorCode ierr; 1471 1472 PetscFunctionBegin; 1473 switch (op) { 1474 case MAT_NEW_NONZERO_LOCATIONS: 1475 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1476 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1477 case MAT_KEEP_NONZERO_PATTERN: 1478 case MAT_NEW_NONZERO_LOCATION_ERR: 1479 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1480 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1481 break; 1482 case MAT_ROW_ORIENTED: 1483 a->roworiented = flg; 1484 1485 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1486 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1487 break; 1488 case MAT_NEW_DIAGONALS: 1489 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1490 break; 1491 case MAT_IGNORE_OFF_PROC_ENTRIES: 1492 a->donotstash = flg; 1493 break; 1494 case MAT_USE_HASH_TABLE: 1495 a->ht_flag = flg; 1496 break; 1497 case MAT_HERMITIAN: 1498 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1499 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1500 1501 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1502 break; 1503 case MAT_SPD: 1504 A->spd_set = PETSC_TRUE; 1505 A->spd = flg; 1506 if (flg) { 1507 A->symmetric = PETSC_TRUE; 1508 A->structurally_symmetric = PETSC_TRUE; 1509 A->symmetric_set = PETSC_TRUE; 1510 A->structurally_symmetric_set = PETSC_TRUE; 1511 } 1512 break; 1513 case MAT_SYMMETRIC: 1514 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1515 break; 1516 case MAT_STRUCTURALLY_SYMMETRIC: 1517 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1518 break; 1519 case MAT_SYMMETRY_ETERNAL: 1520 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1521 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1522 break; 1523 case MAT_IGNORE_LOWER_TRIANGULAR: 1524 aA->ignore_ltriangular = flg; 1525 break; 1526 case MAT_ERROR_LOWER_TRIANGULAR: 1527 aA->ignore_ltriangular = flg; 1528 break; 1529 case MAT_GETROW_UPPERTRIANGULAR: 1530 aA->getrow_utriangular = flg; 1531 break; 1532 default: 1533 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1540 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1541 { 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 if (MAT_INITIAL_MATRIX || *B != A) { 1546 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1547 } 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1553 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1554 { 1555 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1556 Mat a = baij->A, b=baij->B; 1557 PetscErrorCode ierr; 1558 PetscInt nv,m,n; 1559 PetscBool flg; 1560 1561 PetscFunctionBegin; 1562 if (ll != rr) { 1563 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1564 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1565 } 1566 if (!ll) PetscFunctionReturn(0); 1567 1568 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1569 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1570 1571 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1572 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1573 1574 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1575 1576 /* left diagonalscale the off-diagonal part */ 1577 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1578 1579 /* scale the diagonal part */ 1580 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1581 1582 /* right diagonalscale the off-diagonal part */ 1583 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1590 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1591 { 1592 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatEqual_MPISBAIJ" 1604 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1605 { 1606 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1607 Mat a,b,c,d; 1608 PetscBool flg; 1609 PetscErrorCode ierr; 1610 1611 PetscFunctionBegin; 1612 a = matA->A; b = matA->B; 1613 c = matB->A; d = matB->B; 1614 1615 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1616 if (flg) { 1617 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1618 } 1619 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatCopy_MPISBAIJ" 1625 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1626 { 1627 PetscErrorCode ierr; 1628 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1629 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1630 1631 PetscFunctionBegin; 1632 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1633 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1634 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1635 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1636 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1637 } else { 1638 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1639 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1640 } 1641 PetscFunctionReturn(0); 1642 } 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1646 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1647 { 1648 PetscErrorCode ierr; 1649 1650 PetscFunctionBegin; 1651 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1652 PetscFunctionReturn(0); 1653 } 1654 1655 #undef __FUNCT__ 1656 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1657 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1658 { 1659 PetscErrorCode ierr; 1660 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1661 PetscBLASInt bnz,one=1; 1662 Mat_SeqSBAIJ *xa,*ya; 1663 Mat_SeqBAIJ *xb,*yb; 1664 1665 PetscFunctionBegin; 1666 if (str == SAME_NONZERO_PATTERN) { 1667 PetscScalar alpha = a; 1668 xa = (Mat_SeqSBAIJ*)xx->A->data; 1669 ya = (Mat_SeqSBAIJ*)yy->A->data; 1670 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1671 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1672 xb = (Mat_SeqBAIJ*)xx->B->data; 1673 yb = (Mat_SeqBAIJ*)yy->B->data; 1674 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1675 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1676 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1677 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1678 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1679 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1680 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1681 } else { 1682 Mat B; 1683 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1684 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1685 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1686 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1687 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1688 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1689 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1690 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1691 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1692 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1693 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1694 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1695 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1696 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1697 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1698 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1699 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1700 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1701 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1702 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1703 } 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1709 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1710 { 1711 PetscErrorCode ierr; 1712 PetscInt i; 1713 PetscBool flg,sorted; 1714 1715 PetscFunctionBegin; 1716 for (i = 0; i < n; i++) { 1717 ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr); 1718 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i); 1719 ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr); 1720 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i); 1721 } 1722 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1723 for (i=0; i<n; i++) { 1724 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1725 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1726 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1727 } 1728 } 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatShift_MPISBAIJ" 1734 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1735 { 1736 PetscErrorCode ierr; 1737 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1738 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1739 1740 PetscFunctionBegin; 1741 if (!Y->preallocated) { 1742 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1743 } else if (!aij->nz) { 1744 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1745 } 1746 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1747 PetscFunctionReturn(0); 1748 } 1749 1750 /* -------------------------------------------------------------------*/ 1751 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1752 MatGetRow_MPISBAIJ, 1753 MatRestoreRow_MPISBAIJ, 1754 MatMult_MPISBAIJ, 1755 /* 4*/ MatMultAdd_MPISBAIJ, 1756 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1757 MatMultAdd_MPISBAIJ, 1758 0, 1759 0, 1760 0, 1761 /* 10*/ 0, 1762 0, 1763 0, 1764 MatSOR_MPISBAIJ, 1765 MatTranspose_MPISBAIJ, 1766 /* 15*/ MatGetInfo_MPISBAIJ, 1767 MatEqual_MPISBAIJ, 1768 MatGetDiagonal_MPISBAIJ, 1769 MatDiagonalScale_MPISBAIJ, 1770 MatNorm_MPISBAIJ, 1771 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1772 MatAssemblyEnd_MPISBAIJ, 1773 MatSetOption_MPISBAIJ, 1774 MatZeroEntries_MPISBAIJ, 1775 /* 24*/ 0, 1776 0, 1777 0, 1778 0, 1779 0, 1780 /* 29*/ MatSetUp_MPISBAIJ, 1781 0, 1782 0, 1783 0, 1784 0, 1785 /* 34*/ MatDuplicate_MPISBAIJ, 1786 0, 1787 0, 1788 0, 1789 0, 1790 /* 39*/ MatAXPY_MPISBAIJ, 1791 MatGetSubMatrices_MPISBAIJ, 1792 MatIncreaseOverlap_MPISBAIJ, 1793 MatGetValues_MPISBAIJ, 1794 MatCopy_MPISBAIJ, 1795 /* 44*/ 0, 1796 MatScale_MPISBAIJ, 1797 MatShift_MPISBAIJ, 1798 0, 1799 0, 1800 /* 49*/ 0, 1801 0, 1802 0, 1803 0, 1804 0, 1805 /* 54*/ 0, 1806 0, 1807 MatSetUnfactored_MPISBAIJ, 1808 0, 1809 MatSetValuesBlocked_MPISBAIJ, 1810 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1811 0, 1812 0, 1813 0, 1814 0, 1815 /* 64*/ 0, 1816 0, 1817 0, 1818 0, 1819 0, 1820 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1821 0, 1822 0, 1823 0, 1824 0, 1825 /* 74*/ 0, 1826 0, 1827 0, 1828 0, 1829 0, 1830 /* 79*/ 0, 1831 0, 1832 0, 1833 0, 1834 MatLoad_MPISBAIJ, 1835 /* 84*/ 0, 1836 0, 1837 0, 1838 0, 1839 0, 1840 /* 89*/ 0, 1841 0, 1842 0, 1843 0, 1844 0, 1845 /* 94*/ 0, 1846 0, 1847 0, 1848 0, 1849 0, 1850 /* 99*/ 0, 1851 0, 1852 0, 1853 0, 1854 0, 1855 /*104*/ 0, 1856 MatRealPart_MPISBAIJ, 1857 MatImaginaryPart_MPISBAIJ, 1858 MatGetRowUpperTriangular_MPISBAIJ, 1859 MatRestoreRowUpperTriangular_MPISBAIJ, 1860 /*109*/ 0, 1861 0, 1862 0, 1863 0, 1864 0, 1865 /*114*/ 0, 1866 0, 1867 0, 1868 0, 1869 0, 1870 /*119*/ 0, 1871 0, 1872 0, 1873 0, 1874 0, 1875 /*124*/ 0, 1876 0, 1877 0, 1878 0, 1879 0, 1880 /*129*/ 0, 1881 0, 1882 0, 1883 0, 1884 0, 1885 /*134*/ 0, 1886 0, 1887 0, 1888 0, 1889 0, 1890 /*139*/ 0, 1891 0, 1892 0, 1893 0, 1894 0, 1895 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1896 }; 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1900 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1901 { 1902 PetscFunctionBegin; 1903 *a = ((Mat_MPISBAIJ*)A->data)->A; 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1909 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1910 { 1911 Mat_MPISBAIJ *b; 1912 PetscErrorCode ierr; 1913 PetscInt i,mbs,Mbs; 1914 1915 PetscFunctionBegin; 1916 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1917 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1918 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1919 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1920 1921 b = (Mat_MPISBAIJ*)B->data; 1922 mbs = B->rmap->n/bs; 1923 Mbs = B->rmap->N/bs; 1924 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1925 1926 B->rmap->bs = bs; 1927 b->bs2 = bs*bs; 1928 b->mbs = mbs; 1929 b->Mbs = Mbs; 1930 b->nbs = B->cmap->n/bs; 1931 b->Nbs = B->cmap->N/bs; 1932 1933 for (i=0; i<=b->size; i++) { 1934 b->rangebs[i] = B->rmap->range[i]/bs; 1935 } 1936 b->rstartbs = B->rmap->rstart/bs; 1937 b->rendbs = B->rmap->rend/bs; 1938 1939 b->cstartbs = B->cmap->rstart/bs; 1940 b->cendbs = B->cmap->rend/bs; 1941 1942 if (!B->preallocated) { 1943 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1944 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1945 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1946 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1947 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1948 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1949 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1950 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1951 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1952 } 1953 1954 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1955 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1956 1957 B->preallocated = PETSC_TRUE; 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1963 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1964 { 1965 PetscInt m,rstart,cstart,cend; 1966 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1967 const PetscInt *JJ =0; 1968 PetscScalar *values=0; 1969 PetscErrorCode ierr; 1970 1971 PetscFunctionBegin; 1972 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1973 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1974 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1975 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1976 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1977 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1978 m = B->rmap->n/bs; 1979 rstart = B->rmap->rstart/bs; 1980 cstart = B->cmap->rstart/bs; 1981 cend = B->cmap->rend/bs; 1982 1983 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1984 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1985 for (i=0; i<m; i++) { 1986 nz = ii[i+1] - ii[i]; 1987 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1988 nz_max = PetscMax(nz_max,nz); 1989 JJ = jj + ii[i]; 1990 for (j=0; j<nz; j++) { 1991 if (*JJ >= cstart) break; 1992 JJ++; 1993 } 1994 d = 0; 1995 for (; j<nz; j++) { 1996 if (*JJ++ >= cend) break; 1997 d++; 1998 } 1999 d_nnz[i] = d; 2000 o_nnz[i] = nz - d; 2001 } 2002 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2003 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2004 2005 values = (PetscScalar*)V; 2006 if (!values) { 2007 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2008 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2009 } 2010 for (i=0; i<m; i++) { 2011 PetscInt row = i + rstart; 2012 PetscInt ncols = ii[i+1] - ii[i]; 2013 const PetscInt *icols = jj + ii[i]; 2014 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2015 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2016 } 2017 2018 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2019 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2020 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2021 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 /*MC 2026 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2027 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2028 the matrix is stored. 2029 2030 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2031 can call MatSetOption(Mat, MAT_HERMITIAN); 2032 2033 Options Database Keys: 2034 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2035 2036 Level: beginner 2037 2038 .seealso: MatCreateMPISBAIJ 2039 M*/ 2040 2041 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2042 2043 #undef __FUNCT__ 2044 #define __FUNCT__ "MatCreate_MPISBAIJ" 2045 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2046 { 2047 Mat_MPISBAIJ *b; 2048 PetscErrorCode ierr; 2049 PetscBool flg = PETSC_FALSE; 2050 2051 PetscFunctionBegin; 2052 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2053 B->data = (void*)b; 2054 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2055 2056 B->ops->destroy = MatDestroy_MPISBAIJ; 2057 B->ops->view = MatView_MPISBAIJ; 2058 B->assembled = PETSC_FALSE; 2059 B->insertmode = NOT_SET_VALUES; 2060 2061 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2062 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2063 2064 /* build local table of row and column ownerships */ 2065 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2066 2067 /* build cache for off array entries formed */ 2068 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2069 2070 b->donotstash = PETSC_FALSE; 2071 b->colmap = NULL; 2072 b->garray = NULL; 2073 b->roworiented = PETSC_TRUE; 2074 2075 /* stuff used in block assembly */ 2076 b->barray = 0; 2077 2078 /* stuff used for matrix vector multiply */ 2079 b->lvec = 0; 2080 b->Mvctx = 0; 2081 b->slvec0 = 0; 2082 b->slvec0b = 0; 2083 b->slvec1 = 0; 2084 b->slvec1a = 0; 2085 b->slvec1b = 0; 2086 b->sMvctx = 0; 2087 2088 /* stuff for MatGetRow() */ 2089 b->rowindices = 0; 2090 b->rowvalues = 0; 2091 b->getrowactive = PETSC_FALSE; 2092 2093 /* hash table stuff */ 2094 b->ht = 0; 2095 b->hd = 0; 2096 b->ht_size = 0; 2097 b->ht_flag = PETSC_FALSE; 2098 b->ht_fact = 0; 2099 b->ht_total_ct = 0; 2100 b->ht_insert_ct = 0; 2101 2102 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2103 b->ijonly = PETSC_FALSE; 2104 2105 b->in_loc = 0; 2106 b->v_loc = 0; 2107 b->n_loc = 0; 2108 2109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 2115 #if defined(PETSC_HAVE_ELEMENTAL) 2116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2117 #endif 2118 2119 B->symmetric = PETSC_TRUE; 2120 B->structurally_symmetric = PETSC_TRUE; 2121 B->symmetric_set = PETSC_TRUE; 2122 B->structurally_symmetric_set = PETSC_TRUE; 2123 2124 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2125 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2126 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2127 if (flg) { 2128 PetscReal fact = 1.39; 2129 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2130 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2131 if (fact <= 1.0) fact = 1.39; 2132 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2133 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2134 } 2135 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /*MC 2140 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2141 2142 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2143 and MATMPISBAIJ otherwise. 2144 2145 Options Database Keys: 2146 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2147 2148 Level: beginner 2149 2150 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2151 M*/ 2152 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2155 /*@C 2156 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2157 the user should preallocate the matrix storage by setting the parameters 2158 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2159 performance can be increased by more than a factor of 50. 2160 2161 Collective on Mat 2162 2163 Input Parameters: 2164 + B - the matrix 2165 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2166 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2167 . d_nz - number of block nonzeros per block row in diagonal portion of local 2168 submatrix (same for all local rows) 2169 . d_nnz - array containing the number of block nonzeros in the various block rows 2170 in the upper triangular and diagonal part of the in diagonal portion of the local 2171 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2172 for the diagonal entry and set a value even if it is zero. 2173 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2174 submatrix (same for all local rows). 2175 - o_nnz - array containing the number of nonzeros in the various block rows of the 2176 off-diagonal portion of the local submatrix that is right of the diagonal 2177 (possibly different for each block row) or NULL. 2178 2179 2180 Options Database Keys: 2181 . -mat_no_unroll - uses code that does not unroll the loops in the 2182 block calculations (much slower) 2183 . -mat_block_size - size of the blocks to use 2184 2185 Notes: 2186 2187 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2188 than it must be used on all processors that share the object for that argument. 2189 2190 If the *_nnz parameter is given then the *_nz parameter is ignored 2191 2192 Storage Information: 2193 For a square global matrix we define each processor's diagonal portion 2194 to be its local rows and the corresponding columns (a square submatrix); 2195 each processor's off-diagonal portion encompasses the remainder of the 2196 local matrix (a rectangular submatrix). 2197 2198 The user can specify preallocated storage for the diagonal part of 2199 the local submatrix with either d_nz or d_nnz (not both). Set 2200 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2201 memory allocation. Likewise, specify preallocated storage for the 2202 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2203 2204 You can call MatGetInfo() to get information on how effective the preallocation was; 2205 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2206 You can also run with the option -info and look for messages with the string 2207 malloc in them to see if additional memory allocation was needed. 2208 2209 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2210 the figure below we depict these three local rows and all columns (0-11). 2211 2212 .vb 2213 0 1 2 3 4 5 6 7 8 9 10 11 2214 -------------------------- 2215 row 3 |. . . d d d o o o o o o 2216 row 4 |. . . d d d o o o o o o 2217 row 5 |. . . d d d o o o o o o 2218 -------------------------- 2219 .ve 2220 2221 Thus, any entries in the d locations are stored in the d (diagonal) 2222 submatrix, and any entries in the o locations are stored in the 2223 o (off-diagonal) submatrix. Note that the d matrix is stored in 2224 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2225 2226 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2227 plus the diagonal part of the d matrix, 2228 and o_nz should indicate the number of block nonzeros per row in the o matrix 2229 2230 In general, for PDE problems in which most nonzeros are near the diagonal, 2231 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2232 or you will get TERRIBLE performance; see the users' manual chapter on 2233 matrices. 2234 2235 Level: intermediate 2236 2237 .keywords: matrix, block, aij, compressed row, sparse, parallel 2238 2239 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2240 @*/ 2241 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2242 { 2243 PetscErrorCode ierr; 2244 2245 PetscFunctionBegin; 2246 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2247 PetscValidType(B,1); 2248 PetscValidLogicalCollectiveInt(B,bs,2); 2249 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatCreateSBAIJ" 2255 /*@C 2256 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2257 (block compressed row). For good matrix assembly performance 2258 the user should preallocate the matrix storage by setting the parameters 2259 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2260 performance can be increased by more than a factor of 50. 2261 2262 Collective on MPI_Comm 2263 2264 Input Parameters: 2265 + comm - MPI communicator 2266 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2267 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2268 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2269 This value should be the same as the local size used in creating the 2270 y vector for the matrix-vector product y = Ax. 2271 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2272 This value should be the same as the local size used in creating the 2273 x vector for the matrix-vector product y = Ax. 2274 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2275 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2276 . d_nz - number of block nonzeros per block row in diagonal portion of local 2277 submatrix (same for all local rows) 2278 . d_nnz - array containing the number of block nonzeros in the various block rows 2279 in the upper triangular portion of the in diagonal portion of the local 2280 (possibly different for each block block row) or NULL. 2281 If you plan to factor the matrix you must leave room for the diagonal entry and 2282 set its value even if it is zero. 2283 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2284 submatrix (same for all local rows). 2285 - o_nnz - array containing the number of nonzeros in the various block rows of the 2286 off-diagonal portion of the local submatrix (possibly different for 2287 each block row) or NULL. 2288 2289 Output Parameter: 2290 . A - the matrix 2291 2292 Options Database Keys: 2293 . -mat_no_unroll - uses code that does not unroll the loops in the 2294 block calculations (much slower) 2295 . -mat_block_size - size of the blocks to use 2296 . -mat_mpi - use the parallel matrix data structures even on one processor 2297 (defaults to using SeqBAIJ format on one processor) 2298 2299 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2300 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2301 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2302 2303 Notes: 2304 The number of rows and columns must be divisible by blocksize. 2305 This matrix type does not support complex Hermitian operation. 2306 2307 The user MUST specify either the local or global matrix dimensions 2308 (possibly both). 2309 2310 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2311 than it must be used on all processors that share the object for that argument. 2312 2313 If the *_nnz parameter is given then the *_nz parameter is ignored 2314 2315 Storage Information: 2316 For a square global matrix we define each processor's diagonal portion 2317 to be its local rows and the corresponding columns (a square submatrix); 2318 each processor's off-diagonal portion encompasses the remainder of the 2319 local matrix (a rectangular submatrix). 2320 2321 The user can specify preallocated storage for the diagonal part of 2322 the local submatrix with either d_nz or d_nnz (not both). Set 2323 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2324 memory allocation. Likewise, specify preallocated storage for the 2325 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2326 2327 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2328 the figure below we depict these three local rows and all columns (0-11). 2329 2330 .vb 2331 0 1 2 3 4 5 6 7 8 9 10 11 2332 -------------------------- 2333 row 3 |. . . d d d o o o o o o 2334 row 4 |. . . d d d o o o o o o 2335 row 5 |. . . d d d o o o o o o 2336 -------------------------- 2337 .ve 2338 2339 Thus, any entries in the d locations are stored in the d (diagonal) 2340 submatrix, and any entries in the o locations are stored in the 2341 o (off-diagonal) submatrix. Note that the d matrix is stored in 2342 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2343 2344 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2345 plus the diagonal part of the d matrix, 2346 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2347 In general, for PDE problems in which most nonzeros are near the diagonal, 2348 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2349 or you will get TERRIBLE performance; see the users' manual chapter on 2350 matrices. 2351 2352 Level: intermediate 2353 2354 .keywords: matrix, block, aij, compressed row, sparse, parallel 2355 2356 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2357 @*/ 2358 2359 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2360 { 2361 PetscErrorCode ierr; 2362 PetscMPIInt size; 2363 2364 PetscFunctionBegin; 2365 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2366 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2367 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2368 if (size > 1) { 2369 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2370 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2371 } else { 2372 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2373 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2374 } 2375 PetscFunctionReturn(0); 2376 } 2377 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2381 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2382 { 2383 Mat mat; 2384 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2385 PetscErrorCode ierr; 2386 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2387 PetscScalar *array; 2388 2389 PetscFunctionBegin; 2390 *newmat = 0; 2391 2392 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2393 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2394 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2395 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2396 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2397 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2398 2399 mat->factortype = matin->factortype; 2400 mat->preallocated = PETSC_TRUE; 2401 mat->assembled = PETSC_TRUE; 2402 mat->insertmode = NOT_SET_VALUES; 2403 2404 a = (Mat_MPISBAIJ*)mat->data; 2405 a->bs2 = oldmat->bs2; 2406 a->mbs = oldmat->mbs; 2407 a->nbs = oldmat->nbs; 2408 a->Mbs = oldmat->Mbs; 2409 a->Nbs = oldmat->Nbs; 2410 2411 2412 a->size = oldmat->size; 2413 a->rank = oldmat->rank; 2414 a->donotstash = oldmat->donotstash; 2415 a->roworiented = oldmat->roworiented; 2416 a->rowindices = 0; 2417 a->rowvalues = 0; 2418 a->getrowactive = PETSC_FALSE; 2419 a->barray = 0; 2420 a->rstartbs = oldmat->rstartbs; 2421 a->rendbs = oldmat->rendbs; 2422 a->cstartbs = oldmat->cstartbs; 2423 a->cendbs = oldmat->cendbs; 2424 2425 /* hash table stuff */ 2426 a->ht = 0; 2427 a->hd = 0; 2428 a->ht_size = 0; 2429 a->ht_flag = oldmat->ht_flag; 2430 a->ht_fact = oldmat->ht_fact; 2431 a->ht_total_ct = 0; 2432 a->ht_insert_ct = 0; 2433 2434 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2435 if (oldmat->colmap) { 2436 #if defined(PETSC_USE_CTABLE) 2437 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2438 #else 2439 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2440 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2441 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2442 #endif 2443 } else a->colmap = 0; 2444 2445 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2446 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2447 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2448 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2449 } else a->garray = 0; 2450 2451 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2452 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2453 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2454 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2455 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2456 2457 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2458 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2459 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2460 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2461 2462 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2463 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2464 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2465 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2466 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2467 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2468 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2469 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2470 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2471 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2472 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2473 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2474 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2475 2476 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2477 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2478 a->sMvctx = oldmat->sMvctx; 2479 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2480 2481 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2482 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2483 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2484 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2485 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2486 *newmat = mat; 2487 PetscFunctionReturn(0); 2488 } 2489 2490 #undef __FUNCT__ 2491 #define __FUNCT__ "MatLoad_MPISBAIJ" 2492 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2493 { 2494 PetscErrorCode ierr; 2495 PetscInt i,nz,j,rstart,rend; 2496 PetscScalar *vals,*buf; 2497 MPI_Comm comm; 2498 MPI_Status status; 2499 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2500 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2501 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2502 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2503 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2504 PetscInt dcount,kmax,k,nzcount,tmp; 2505 int fd; 2506 2507 PetscFunctionBegin; 2508 /* force binary viewer to load .info file if it has not yet done so */ 2509 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2510 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2511 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2512 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2513 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2514 if (bs < 0) bs = 1; 2515 2516 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2517 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2518 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2519 if (!rank) { 2520 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2521 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2522 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2523 } 2524 2525 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2526 M = header[1]; 2527 N = header[2]; 2528 2529 /* If global sizes are set, check if they are consistent with that given in the file */ 2530 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2531 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2532 2533 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2534 2535 /* 2536 This code adds extra rows to make sure the number of rows is 2537 divisible by the blocksize 2538 */ 2539 Mbs = M/bs; 2540 extra_rows = bs - M + bs*(Mbs); 2541 if (extra_rows == bs) extra_rows = 0; 2542 else Mbs++; 2543 if (extra_rows &&!rank) { 2544 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2545 } 2546 2547 /* determine ownership of all rows */ 2548 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2549 mbs = Mbs/size + ((Mbs % size) > rank); 2550 m = mbs*bs; 2551 } else { /* User Set */ 2552 m = newmat->rmap->n; 2553 mbs = m/bs; 2554 } 2555 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2556 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2557 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2558 rowners[0] = 0; 2559 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2560 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2561 rstart = rowners[rank]; 2562 rend = rowners[rank+1]; 2563 2564 /* distribute row lengths to all processors */ 2565 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2566 if (!rank) { 2567 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2568 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2569 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2570 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2571 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2572 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2573 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2574 } else { 2575 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2576 } 2577 2578 if (!rank) { /* procs[0] */ 2579 /* calculate the number of nonzeros on each processor */ 2580 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2581 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2582 for (i=0; i<size; i++) { 2583 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2584 procsnz[i] += rowlengths[j]; 2585 } 2586 } 2587 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2588 2589 /* determine max buffer needed and allocate it */ 2590 maxnz = 0; 2591 for (i=0; i<size; i++) { 2592 maxnz = PetscMax(maxnz,procsnz[i]); 2593 } 2594 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2595 2596 /* read in my part of the matrix column indices */ 2597 nz = procsnz[0]; 2598 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2599 mycols = ibuf; 2600 if (size == 1) nz -= extra_rows; 2601 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2602 if (size == 1) { 2603 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2604 } 2605 2606 /* read in every ones (except the last) and ship off */ 2607 for (i=1; i<size-1; i++) { 2608 nz = procsnz[i]; 2609 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2610 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2611 } 2612 /* read in the stuff for the last proc */ 2613 if (size != 1) { 2614 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2615 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2616 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2617 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2618 } 2619 ierr = PetscFree(cols);CHKERRQ(ierr); 2620 } else { /* procs[i], i>0 */ 2621 /* determine buffer space needed for message */ 2622 nz = 0; 2623 for (i=0; i<m; i++) nz += locrowlens[i]; 2624 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2625 mycols = ibuf; 2626 /* receive message of column indices*/ 2627 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2628 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2629 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2630 } 2631 2632 /* loop over local rows, determining number of off diagonal entries */ 2633 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2634 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2635 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2636 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2637 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2638 rowcount = 0; 2639 nzcount = 0; 2640 for (i=0; i<mbs; i++) { 2641 dcount = 0; 2642 odcount = 0; 2643 for (j=0; j<bs; j++) { 2644 kmax = locrowlens[rowcount]; 2645 for (k=0; k<kmax; k++) { 2646 tmp = mycols[nzcount++]/bs; /* block col. index */ 2647 if (!mask[tmp]) { 2648 mask[tmp] = 1; 2649 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2650 else masked1[dcount++] = tmp; /* entry in diag portion */ 2651 } 2652 } 2653 rowcount++; 2654 } 2655 2656 dlens[i] = dcount; /* d_nzz[i] */ 2657 odlens[i] = odcount; /* o_nzz[i] */ 2658 2659 /* zero out the mask elements we set */ 2660 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2661 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2662 } 2663 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2664 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2665 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2666 2667 if (!rank) { 2668 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2669 /* read in my part of the matrix numerical values */ 2670 nz = procsnz[0]; 2671 vals = buf; 2672 mycols = ibuf; 2673 if (size == 1) nz -= extra_rows; 2674 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2675 if (size == 1) { 2676 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2677 } 2678 2679 /* insert into matrix */ 2680 jj = rstart*bs; 2681 for (i=0; i<m; i++) { 2682 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2683 mycols += locrowlens[i]; 2684 vals += locrowlens[i]; 2685 jj++; 2686 } 2687 2688 /* read in other processors (except the last one) and ship out */ 2689 for (i=1; i<size-1; i++) { 2690 nz = procsnz[i]; 2691 vals = buf; 2692 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2693 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2694 } 2695 /* the last proc */ 2696 if (size != 1) { 2697 nz = procsnz[i] - extra_rows; 2698 vals = buf; 2699 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2700 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2701 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2702 } 2703 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2704 2705 } else { 2706 /* receive numeric values */ 2707 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2708 2709 /* receive message of values*/ 2710 vals = buf; 2711 mycols = ibuf; 2712 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2713 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2714 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2715 2716 /* insert into matrix */ 2717 jj = rstart*bs; 2718 for (i=0; i<m; i++) { 2719 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2720 mycols += locrowlens[i]; 2721 vals += locrowlens[i]; 2722 jj++; 2723 } 2724 } 2725 2726 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2727 ierr = PetscFree(buf);CHKERRQ(ierr); 2728 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2729 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2730 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2731 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2732 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2733 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 2737 #undef __FUNCT__ 2738 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2739 /*XXXXX@ 2740 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2741 2742 Input Parameters: 2743 . mat - the matrix 2744 . fact - factor 2745 2746 Not Collective on Mat, each process can have a different hash factor 2747 2748 Level: advanced 2749 2750 Notes: 2751 This can also be set by the command line option: -mat_use_hash_table fact 2752 2753 .keywords: matrix, hashtable, factor, HT 2754 2755 .seealso: MatSetOption() 2756 @XXXXX*/ 2757 2758 2759 #undef __FUNCT__ 2760 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2761 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2762 { 2763 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2764 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2765 PetscReal atmp; 2766 PetscReal *work,*svalues,*rvalues; 2767 PetscErrorCode ierr; 2768 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2769 PetscMPIInt rank,size; 2770 PetscInt *rowners_bs,dest,count,source; 2771 PetscScalar *va; 2772 MatScalar *ba; 2773 MPI_Status stat; 2774 2775 PetscFunctionBegin; 2776 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2777 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2778 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2779 2780 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2781 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2782 2783 bs = A->rmap->bs; 2784 mbs = a->mbs; 2785 Mbs = a->Mbs; 2786 ba = b->a; 2787 bi = b->i; 2788 bj = b->j; 2789 2790 /* find ownerships */ 2791 rowners_bs = A->rmap->range; 2792 2793 /* each proc creates an array to be distributed */ 2794 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2795 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2796 2797 /* row_max for B */ 2798 if (rank != size-1) { 2799 for (i=0; i<mbs; i++) { 2800 ncols = bi[1] - bi[0]; bi++; 2801 brow = bs*i; 2802 for (j=0; j<ncols; j++) { 2803 bcol = bs*(*bj); 2804 for (kcol=0; kcol<bs; kcol++) { 2805 col = bcol + kcol; /* local col index */ 2806 col += rowners_bs[rank+1]; /* global col index */ 2807 for (krow=0; krow<bs; krow++) { 2808 atmp = PetscAbsScalar(*ba); ba++; 2809 row = brow + krow; /* local row index */ 2810 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2811 if (work[col] < atmp) work[col] = atmp; 2812 } 2813 } 2814 bj++; 2815 } 2816 } 2817 2818 /* send values to its owners */ 2819 for (dest=rank+1; dest<size; dest++) { 2820 svalues = work + rowners_bs[dest]; 2821 count = rowners_bs[dest+1]-rowners_bs[dest]; 2822 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2823 } 2824 } 2825 2826 /* receive values */ 2827 if (rank) { 2828 rvalues = work; 2829 count = rowners_bs[rank+1]-rowners_bs[rank]; 2830 for (source=0; source<rank; source++) { 2831 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2832 /* process values */ 2833 for (i=0; i<count; i++) { 2834 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2835 } 2836 } 2837 } 2838 2839 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2840 ierr = PetscFree(work);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843 2844 #undef __FUNCT__ 2845 #define __FUNCT__ "MatSOR_MPISBAIJ" 2846 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2847 { 2848 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2849 PetscErrorCode ierr; 2850 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2851 PetscScalar *x,*ptr,*from; 2852 Vec bb1; 2853 const PetscScalar *b; 2854 2855 PetscFunctionBegin; 2856 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2857 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2858 2859 if (flag == SOR_APPLY_UPPER) { 2860 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2861 PetscFunctionReturn(0); 2862 } 2863 2864 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2865 if (flag & SOR_ZERO_INITIAL_GUESS) { 2866 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2867 its--; 2868 } 2869 2870 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2871 while (its--) { 2872 2873 /* lower triangular part: slvec0b = - B^T*xx */ 2874 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2875 2876 /* copy xx into slvec0a */ 2877 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2878 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2879 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2880 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2881 2882 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2883 2884 /* copy bb into slvec1a */ 2885 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2886 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2887 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2888 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2889 2890 /* set slvec1b = 0 */ 2891 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2892 2893 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2894 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2895 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2896 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2897 2898 /* upper triangular part: bb1 = bb1 - B*x */ 2899 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2900 2901 /* local diagonal sweep */ 2902 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2903 } 2904 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2905 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2906 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2907 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2908 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2909 } else if (flag & SOR_EISENSTAT) { 2910 Vec xx1; 2911 PetscBool hasop; 2912 const PetscScalar *diag; 2913 PetscScalar *sl,scale = (omega - 2.0)/omega; 2914 PetscInt i,n; 2915 2916 if (!mat->xx1) { 2917 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2918 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2919 } 2920 xx1 = mat->xx1; 2921 bb1 = mat->bb1; 2922 2923 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2924 2925 if (!mat->diag) { 2926 /* this is wrong for same matrix with new nonzero values */ 2927 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2928 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2929 } 2930 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2931 2932 if (hasop) { 2933 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2934 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2935 } else { 2936 /* 2937 These two lines are replaced by code that may be a bit faster for a good compiler 2938 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2939 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2940 */ 2941 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2942 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2943 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2944 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2945 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2946 if (omega == 1.0) { 2947 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2948 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2949 } else { 2950 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2951 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2952 } 2953 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2954 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2955 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2956 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2957 } 2958 2959 /* multiply off-diagonal portion of matrix */ 2960 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2961 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2962 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2963 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2964 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2965 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2966 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2967 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2968 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2969 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2970 2971 /* local sweep */ 2972 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2973 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2974 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2975 PetscFunctionReturn(0); 2976 } 2977 2978 #undef __FUNCT__ 2979 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2980 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2981 { 2982 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2983 PetscErrorCode ierr; 2984 Vec lvec1,bb1; 2985 2986 PetscFunctionBegin; 2987 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2988 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2989 2990 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2991 if (flag & SOR_ZERO_INITIAL_GUESS) { 2992 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2993 its--; 2994 } 2995 2996 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2997 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2998 while (its--) { 2999 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3000 3001 /* lower diagonal part: bb1 = bb - B^T*xx */ 3002 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3003 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3004 3005 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3006 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3007 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3008 3009 /* upper diagonal part: bb1 = bb1 - B*x */ 3010 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3011 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3012 3013 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3014 3015 /* diagonal sweep */ 3016 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3017 } 3018 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3019 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3020 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3021 PetscFunctionReturn(0); 3022 } 3023 3024 #undef __FUNCT__ 3025 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3026 /*@ 3027 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3028 CSR format the local rows. 3029 3030 Collective on MPI_Comm 3031 3032 Input Parameters: 3033 + comm - MPI communicator 3034 . bs - the block size, only a block size of 1 is supported 3035 . m - number of local rows (Cannot be PETSC_DECIDE) 3036 . n - This value should be the same as the local size used in creating the 3037 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3038 calculated if N is given) For square matrices n is almost always m. 3039 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3040 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3041 . i - row indices 3042 . j - column indices 3043 - a - matrix values 3044 3045 Output Parameter: 3046 . mat - the matrix 3047 3048 Level: intermediate 3049 3050 Notes: 3051 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3052 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3053 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3054 3055 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3056 3057 .keywords: matrix, aij, compressed row, sparse, parallel 3058 3059 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3060 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3061 @*/ 3062 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3063 { 3064 PetscErrorCode ierr; 3065 3066 3067 PetscFunctionBegin; 3068 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3069 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3070 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3071 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3072 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3073 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3074 PetscFunctionReturn(0); 3075 } 3076 3077 3078 #undef __FUNCT__ 3079 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3080 /*@C 3081 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3082 (the default parallel PETSc format). 3083 3084 Collective on MPI_Comm 3085 3086 Input Parameters: 3087 + B - the matrix 3088 . bs - the block size 3089 . i - the indices into j for the start of each local row (starts with zero) 3090 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3091 - v - optional values in the matrix 3092 3093 Level: developer 3094 3095 .keywords: matrix, aij, compressed row, sparse, parallel 3096 3097 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3098 @*/ 3099 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3100 { 3101 PetscErrorCode ierr; 3102 3103 PetscFunctionBegin; 3104 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3105 PetscFunctionReturn(0); 3106 } 3107 3108 #undef __FUNCT__ 3109 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3110 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3111 { 3112 PetscErrorCode ierr; 3113 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3114 PetscInt *indx; 3115 PetscScalar *values; 3116 3117 PetscFunctionBegin; 3118 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3119 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3120 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3121 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3122 PetscInt *bindx,rmax=a->rmax,j; 3123 3124 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3125 mbs = m/bs; Nbs = N/cbs; 3126 if (n == PETSC_DECIDE) { 3127 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3128 } 3129 /* Check sum(n) = Nbs */ 3130 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3131 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3132 3133 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3134 rstart -= mbs; 3135 3136 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3137 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3138 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3139 for (i=0; i<mbs; i++) { 3140 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3141 nnz = nnz/bs; 3142 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3143 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3144 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3145 } 3146 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3147 ierr = PetscFree(bindx);CHKERRQ(ierr); 3148 3149 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3150 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3151 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3152 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3153 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3154 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3155 } 3156 3157 /* numeric phase */ 3158 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3159 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3160 3161 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3162 for (i=0; i<m; i++) { 3163 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3164 Ii = i + rstart; 3165 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3166 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3167 } 3168 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3169 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3170 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3171 PetscFunctionReturn(0); 3172 } 3173