1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 } 637 /* Bmat */ 638 v = bmat->a; jj = bmat->j; 639 for (brow=0; brow<mbs; brow++) { 640 grow = bs*(rstart + brow); 641 nz = bmat->i[brow+1] - bmat->i[brow]; 642 for (bcol=0; bcol<nz; bcol++) { 643 gcol = bs*garray[*jj]; jj++; 644 for (col=0; col<bs; col++) { 645 for (row=0; row<bs; row++) { 646 vabs = PetscAbsScalar(*v); v++; 647 rsum[gcol+col] += vabs; 648 rsum[grow+row] += vabs; 649 } 650 } 651 } 652 } 653 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 654 *norm = 0.0; 655 for (col=0; col<mat->cmap->N; col++) { 656 if (rsum2[col] > *norm) *norm = rsum2[col]; 657 } 658 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 660 } 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscInt nstash,reallocs; 671 InsertMode addv; 672 673 PetscFunctionBegin; 674 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 675 676 /* make sure all processors are either in INSERTMODE or ADDMODE */ 677 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 678 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 679 mat->insertmode = addv; /* in case this processor had no cache */ 680 681 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 682 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 683 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 684 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 685 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 686 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 692 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 693 { 694 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 695 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 696 PetscErrorCode ierr; 697 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 698 PetscInt *row,*col; 699 PetscBool other_disassembled; 700 PetscMPIInt n; 701 PetscBool r1,r2,r3; 702 MatScalar *val; 703 InsertMode addv = mat->insertmode; 704 705 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 706 PetscFunctionBegin; 707 if (!baij->donotstash && !mat->nooffprocentries) { 708 while (1) { 709 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 710 if (!flg) break; 711 712 for (i=0; i<n;) { 713 /* Now identify the consecutive vals belonging to the same row */ 714 for (j=i,rstart=row[j]; j<n; j++) { 715 if (row[j] != rstart) break; 716 } 717 if (j < n) ncols = j-i; 718 else ncols = n-i; 719 /* Now assemble all these values with a single function call */ 720 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 721 i = j; 722 } 723 } 724 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 725 /* Now process the block-stash. Since the values are stashed column-oriented, 726 set the roworiented flag to column oriented, and after MatSetValues() 727 restore the original flags */ 728 r1 = baij->roworiented; 729 r2 = a->roworiented; 730 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 731 732 baij->roworiented = PETSC_FALSE; 733 a->roworiented = PETSC_FALSE; 734 735 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 736 while (1) { 737 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 738 if (!flg) break; 739 740 for (i=0; i<n;) { 741 /* Now identify the consecutive vals belonging to the same row */ 742 for (j=i,rstart=row[j]; j<n; j++) { 743 if (row[j] != rstart) break; 744 } 745 if (j < n) ncols = j-i; 746 else ncols = n-i; 747 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 748 i = j; 749 } 750 } 751 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 752 753 baij->roworiented = r1; 754 a->roworiented = r2; 755 756 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 757 } 758 759 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 760 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 761 762 /* determine if any processor has disassembled, if so we must 763 also disassemble ourselfs, in order that we may reassemble. */ 764 /* 765 if nonzero structure of submatrix B cannot change then we know that 766 no processor disassembled thus we can skip this stuff 767 */ 768 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 769 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 770 if (mat->was_assembled && !other_disassembled) { 771 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 772 } 773 } 774 775 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 776 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 777 } 778 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 779 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 780 781 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 782 783 baij->rowvalues = 0; 784 785 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 786 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 787 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 788 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 789 } 790 PetscFunctionReturn(0); 791 } 792 793 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 794 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 795 #include <petscdraw.h> 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 798 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 799 { 800 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 801 PetscErrorCode ierr; 802 PetscInt bs = mat->rmap->bs; 803 PetscMPIInt rank = baij->rank; 804 PetscBool iascii,isdraw; 805 PetscViewer sviewer; 806 PetscViewerFormat format; 807 808 PetscFunctionBegin; 809 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 810 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 811 if (iascii) { 812 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 813 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 814 MatInfo info; 815 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 816 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 818 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 819 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 820 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 821 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 822 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 823 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 824 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 825 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 826 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } else if (format == PETSC_VIEWER_ASCII_INFO) { 829 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 832 PetscFunctionReturn(0); 833 } 834 } 835 836 if (isdraw) { 837 PetscDraw draw; 838 PetscBool isnull; 839 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 840 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 841 } 842 843 { 844 /* assemble the entire matrix onto first processor. */ 845 Mat A; 846 Mat_SeqSBAIJ *Aloc; 847 Mat_SeqBAIJ *Bloc; 848 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 849 MatScalar *a; 850 const char *matname; 851 852 /* Should this be the same type as mat? */ 853 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 854 if (!rank) { 855 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 856 } else { 857 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 858 } 859 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 860 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 861 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 862 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 863 864 /* copy over the A part */ 865 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 866 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 867 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 868 869 for (i=0; i<mbs; i++) { 870 rvals[0] = bs*(baij->rstartbs + i); 871 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 872 for (j=ai[i]; j<ai[i+1]; j++) { 873 col = (baij->cstartbs+aj[j])*bs; 874 for (k=0; k<bs; k++) { 875 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 876 col++; 877 a += bs; 878 } 879 } 880 } 881 /* copy over the B part */ 882 Bloc = (Mat_SeqBAIJ*)baij->B->data; 883 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 884 for (i=0; i<mbs; i++) { 885 886 rvals[0] = bs*(baij->rstartbs + i); 887 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 888 for (j=ai[i]; j<ai[i+1]; j++) { 889 col = baij->garray[aj[j]]*bs; 890 for (k=0; k<bs; k++) { 891 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 892 col++; 893 a += bs; 894 } 895 } 896 } 897 ierr = PetscFree(rvals);CHKERRQ(ierr); 898 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 899 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 900 /* 901 Everyone has to call to draw the matrix since the graphics waits are 902 synchronized across all processors that share the PetscDraw object 903 */ 904 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 905 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 906 if (!rank) { 907 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 908 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 909 } 910 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 911 ierr = MatDestroy(&A);CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } 915 916 #undef __FUNCT__ 917 #define __FUNCT__ "MatView_MPISBAIJ" 918 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 919 { 920 PetscErrorCode ierr; 921 PetscBool iascii,isdraw,issocket,isbinary; 922 923 PetscFunctionBegin; 924 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 925 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 926 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 927 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 928 if (iascii || isdraw || issocket || isbinary) { 929 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatDestroy_MPISBAIJ" 936 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 937 { 938 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 #if defined(PETSC_USE_LOG) 943 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 944 #endif 945 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 946 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 947 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 948 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 949 #if defined(PETSC_USE_CTABLE) 950 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 951 #else 952 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 953 #endif 954 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 955 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 956 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 957 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 958 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 959 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 960 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 961 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 962 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 963 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 964 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 965 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 966 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 967 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 968 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 969 #if defined(PETSC_USE_REAL_MAT_SINGLE) 970 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 971 #endif 972 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 973 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 974 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 975 ierr = PetscFree(mat->data);CHKERRQ(ierr); 976 977 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 978 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 979 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 980 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 981 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 982 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 983 #if defined(PETSC_HAVE_ELEMENTAL) 984 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 985 #endif 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 991 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 992 { 993 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 994 PetscErrorCode ierr; 995 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 996 PetscScalar *from; 997 const PetscScalar *x; 998 999 PetscFunctionBegin; 1000 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1001 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1002 1003 /* diagonal part */ 1004 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1005 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1006 1007 /* subdiagonal part */ 1008 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1009 1010 /* copy x into the vec slvec0 */ 1011 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1012 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1013 1014 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1015 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1016 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1017 1018 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1019 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1020 /* supperdiagonal part */ 1021 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatMult_MPISBAIJ" 1027 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1028 { 1029 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1030 PetscErrorCode ierr; 1031 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1032 PetscScalar *from; 1033 const PetscScalar *x; 1034 1035 PetscFunctionBegin; 1036 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1037 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1038 1039 /* diagonal part */ 1040 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1041 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1042 1043 /* subdiagonal part */ 1044 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1045 1046 /* copy x into the vec slvec0 */ 1047 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1048 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1049 1050 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1051 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1052 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1053 1054 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1056 /* supperdiagonal part */ 1057 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1063 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1064 { 1065 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt nt; 1068 1069 PetscFunctionBegin; 1070 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1071 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1072 1073 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1074 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1075 1076 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1077 /* do diagonal part */ 1078 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1079 /* do supperdiagonal part */ 1080 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1082 /* do subdiagonal part */ 1083 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1084 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1091 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1092 { 1093 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1096 PetscScalar *from,zero=0.0; 1097 const PetscScalar *x; 1098 1099 PetscFunctionBegin; 1100 /* 1101 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1102 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1103 */ 1104 /* diagonal part */ 1105 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1106 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1107 1108 /* subdiagonal part */ 1109 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1110 1111 /* copy x into the vec slvec0 */ 1112 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1113 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1114 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1115 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1116 1117 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1119 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 1121 /* supperdiagonal part */ 1122 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1128 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1129 { 1130 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1135 /* do diagonal part */ 1136 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1137 /* do supperdiagonal part */ 1138 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1140 1141 /* do subdiagonal part */ 1142 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1143 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1144 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 /* 1149 This only works correctly for square matrices where the subblock A->A is the 1150 diagonal block 1151 */ 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1154 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1155 { 1156 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1157 PetscErrorCode ierr; 1158 1159 PetscFunctionBegin; 1160 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1161 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatScale_MPISBAIJ" 1167 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1168 { 1169 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1170 PetscErrorCode ierr; 1171 1172 PetscFunctionBegin; 1173 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1174 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1180 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1181 { 1182 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1183 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1184 PetscErrorCode ierr; 1185 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1186 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1187 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1188 1189 PetscFunctionBegin; 1190 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1191 mat->getrowactive = PETSC_TRUE; 1192 1193 if (!mat->rowvalues && (idx || v)) { 1194 /* 1195 allocate enough space to hold information from the longest row. 1196 */ 1197 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1198 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1199 PetscInt max = 1,mbs = mat->mbs,tmp; 1200 for (i=0; i<mbs; i++) { 1201 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1202 if (max < tmp) max = tmp; 1203 } 1204 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1205 } 1206 1207 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1208 lrow = row - brstart; /* local row index */ 1209 1210 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1211 if (!v) {pvA = 0; pvB = 0;} 1212 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1213 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1214 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1215 nztot = nzA + nzB; 1216 1217 cmap = mat->garray; 1218 if (v || idx) { 1219 if (nztot) { 1220 /* Sort by increasing column numbers, assuming A and B already sorted */ 1221 PetscInt imark = -1; 1222 if (v) { 1223 *v = v_p = mat->rowvalues; 1224 for (i=0; i<nzB; i++) { 1225 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1226 else break; 1227 } 1228 imark = i; 1229 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1230 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1231 } 1232 if (idx) { 1233 *idx = idx_p = mat->rowindices; 1234 if (imark > -1) { 1235 for (i=0; i<imark; i++) { 1236 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1237 } 1238 } else { 1239 for (i=0; i<nzB; i++) { 1240 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1241 else break; 1242 } 1243 imark = i; 1244 } 1245 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1246 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1247 } 1248 } else { 1249 if (idx) *idx = 0; 1250 if (v) *v = 0; 1251 } 1252 } 1253 *nz = nztot; 1254 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1255 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1261 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1262 { 1263 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1264 1265 PetscFunctionBegin; 1266 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1267 baij->getrowactive = PETSC_FALSE; 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1273 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1274 { 1275 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1276 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1277 1278 PetscFunctionBegin; 1279 aA->getrow_utriangular = PETSC_TRUE; 1280 PetscFunctionReturn(0); 1281 } 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1284 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1285 { 1286 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1287 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1288 1289 PetscFunctionBegin; 1290 aA->getrow_utriangular = PETSC_FALSE; 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNCT__ 1295 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1296 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1297 { 1298 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1299 PetscErrorCode ierr; 1300 1301 PetscFunctionBegin; 1302 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1303 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1309 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1310 { 1311 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1316 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1321 Input: isrow - distributed(parallel), 1322 iscol_local - locally owned (seq) 1323 */ 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "ISEqual_private" 1326 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1327 { 1328 PetscErrorCode ierr; 1329 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1330 const PetscInt *ptr1,*ptr2; 1331 1332 PetscFunctionBegin; 1333 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1334 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1335 if (sz1 > sz2) { 1336 *flg = PETSC_FALSE; 1337 PetscFunctionReturn(0); 1338 } 1339 1340 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1341 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1342 1343 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1344 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1345 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1346 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1347 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1348 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1349 1350 nmatch=0; 1351 k = 0; 1352 for (i=0; i<sz1; i++){ 1353 for (j=k; j<sz2; j++){ 1354 if (a1[i] == a2[j]) { 1355 k = j; nmatch++; 1356 break; 1357 } 1358 } 1359 } 1360 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1361 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1362 ierr = PetscFree(a1);CHKERRQ(ierr); 1363 ierr = PetscFree(a2);CHKERRQ(ierr); 1364 if (nmatch < sz1) { 1365 *flg = PETSC_FALSE; 1366 } else { 1367 *flg = PETSC_TRUE; 1368 } 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1374 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1375 { 1376 PetscErrorCode ierr; 1377 IS iscol_local; 1378 PetscInt csize; 1379 PetscBool isequal; 1380 1381 PetscFunctionBegin; 1382 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1383 if (call == MAT_REUSE_MATRIX) { 1384 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1385 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1386 } else { 1387 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1388 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1389 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1390 } 1391 1392 /* now call MatGetSubMatrix_MPIBAIJ() */ 1393 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1394 if (call == MAT_INITIAL_MATRIX) { 1395 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1396 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1403 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1404 { 1405 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1406 PetscErrorCode ierr; 1407 1408 PetscFunctionBegin; 1409 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1410 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1416 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1417 { 1418 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1419 Mat A = a->A,B = a->B; 1420 PetscErrorCode ierr; 1421 PetscReal isend[5],irecv[5]; 1422 1423 PetscFunctionBegin; 1424 info->block_size = (PetscReal)matin->rmap->bs; 1425 1426 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1427 1428 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1429 isend[3] = info->memory; isend[4] = info->mallocs; 1430 1431 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1432 1433 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1434 isend[3] += info->memory; isend[4] += info->mallocs; 1435 if (flag == MAT_LOCAL) { 1436 info->nz_used = isend[0]; 1437 info->nz_allocated = isend[1]; 1438 info->nz_unneeded = isend[2]; 1439 info->memory = isend[3]; 1440 info->mallocs = isend[4]; 1441 } else if (flag == MAT_GLOBAL_MAX) { 1442 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1443 1444 info->nz_used = irecv[0]; 1445 info->nz_allocated = irecv[1]; 1446 info->nz_unneeded = irecv[2]; 1447 info->memory = irecv[3]; 1448 info->mallocs = irecv[4]; 1449 } else if (flag == MAT_GLOBAL_SUM) { 1450 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1451 1452 info->nz_used = irecv[0]; 1453 info->nz_allocated = irecv[1]; 1454 info->nz_unneeded = irecv[2]; 1455 info->memory = irecv[3]; 1456 info->mallocs = irecv[4]; 1457 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1458 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1459 info->fill_ratio_needed = 0; 1460 info->factor_mallocs = 0; 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1466 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1467 { 1468 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1469 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1470 PetscErrorCode ierr; 1471 1472 PetscFunctionBegin; 1473 switch (op) { 1474 case MAT_NEW_NONZERO_LOCATIONS: 1475 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1476 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1477 case MAT_KEEP_NONZERO_PATTERN: 1478 case MAT_NEW_NONZERO_LOCATION_ERR: 1479 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1480 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1481 break; 1482 case MAT_ROW_ORIENTED: 1483 a->roworiented = flg; 1484 1485 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1486 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1487 break; 1488 case MAT_NEW_DIAGONALS: 1489 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1490 break; 1491 case MAT_IGNORE_OFF_PROC_ENTRIES: 1492 a->donotstash = flg; 1493 break; 1494 case MAT_USE_HASH_TABLE: 1495 a->ht_flag = flg; 1496 break; 1497 case MAT_HERMITIAN: 1498 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1499 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1500 1501 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1502 break; 1503 case MAT_SPD: 1504 A->spd_set = PETSC_TRUE; 1505 A->spd = flg; 1506 if (flg) { 1507 A->symmetric = PETSC_TRUE; 1508 A->structurally_symmetric = PETSC_TRUE; 1509 A->symmetric_set = PETSC_TRUE; 1510 A->structurally_symmetric_set = PETSC_TRUE; 1511 } 1512 break; 1513 case MAT_SYMMETRIC: 1514 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1515 break; 1516 case MAT_STRUCTURALLY_SYMMETRIC: 1517 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1518 break; 1519 case MAT_SYMMETRY_ETERNAL: 1520 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1521 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1522 break; 1523 case MAT_IGNORE_LOWER_TRIANGULAR: 1524 aA->ignore_ltriangular = flg; 1525 break; 1526 case MAT_ERROR_LOWER_TRIANGULAR: 1527 aA->ignore_ltriangular = flg; 1528 break; 1529 case MAT_GETROW_UPPERTRIANGULAR: 1530 aA->getrow_utriangular = flg; 1531 break; 1532 default: 1533 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1540 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1541 { 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 if (MAT_INITIAL_MATRIX || *B != A) { 1546 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1547 } 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1553 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1554 { 1555 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1556 Mat a = baij->A, b=baij->B; 1557 PetscErrorCode ierr; 1558 PetscInt nv,m,n; 1559 PetscBool flg; 1560 1561 PetscFunctionBegin; 1562 if (ll != rr) { 1563 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1564 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1565 } 1566 if (!ll) PetscFunctionReturn(0); 1567 1568 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1569 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1570 1571 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1572 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1573 1574 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1575 1576 /* left diagonalscale the off-diagonal part */ 1577 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1578 1579 /* scale the diagonal part */ 1580 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1581 1582 /* right diagonalscale the off-diagonal part */ 1583 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1590 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1591 { 1592 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatEqual_MPISBAIJ" 1604 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1605 { 1606 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1607 Mat a,b,c,d; 1608 PetscBool flg; 1609 PetscErrorCode ierr; 1610 1611 PetscFunctionBegin; 1612 a = matA->A; b = matA->B; 1613 c = matB->A; d = matB->B; 1614 1615 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1616 if (flg) { 1617 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1618 } 1619 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatCopy_MPISBAIJ" 1625 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1626 { 1627 PetscErrorCode ierr; 1628 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1629 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1630 1631 PetscFunctionBegin; 1632 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1633 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1634 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1635 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1636 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1637 } else { 1638 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1639 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1640 } 1641 PetscFunctionReturn(0); 1642 } 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1646 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1647 { 1648 PetscErrorCode ierr; 1649 1650 PetscFunctionBegin; 1651 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1652 PetscFunctionReturn(0); 1653 } 1654 1655 #undef __FUNCT__ 1656 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1657 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1658 { 1659 PetscErrorCode ierr; 1660 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1661 PetscBLASInt bnz,one=1; 1662 Mat_SeqSBAIJ *xa,*ya; 1663 Mat_SeqBAIJ *xb,*yb; 1664 1665 PetscFunctionBegin; 1666 if (str == SAME_NONZERO_PATTERN) { 1667 PetscScalar alpha = a; 1668 xa = (Mat_SeqSBAIJ*)xx->A->data; 1669 ya = (Mat_SeqSBAIJ*)yy->A->data; 1670 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1671 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1672 xb = (Mat_SeqBAIJ*)xx->B->data; 1673 yb = (Mat_SeqBAIJ*)yy->B->data; 1674 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1675 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1676 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1677 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1678 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1679 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1680 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1681 } else { 1682 Mat B; 1683 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1684 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1685 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1686 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1687 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1688 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1689 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1690 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1691 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1692 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1693 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1694 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1695 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1696 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1697 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1698 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1699 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1700 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1701 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1702 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1703 } 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1709 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1710 { 1711 PetscErrorCode ierr; 1712 PetscInt i; 1713 PetscBool flg,sorted; 1714 1715 PetscFunctionBegin; 1716 for (i = 0; i < n; i++) { 1717 ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr); 1718 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i); 1719 ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr); 1720 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i); 1721 } 1722 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1723 for (i=0; i<n; i++) { 1724 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1725 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1726 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1727 } 1728 } 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatShift_MPISBAIJ" 1734 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1735 { 1736 PetscErrorCode ierr; 1737 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1738 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data,*bij = (Mat_SeqSBAIJ*)maij->B->data; 1739 1740 PetscFunctionBegin; 1741 if (!aij->nz && !bij->nz) { 1742 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1743 } 1744 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1745 PetscFunctionReturn(0); 1746 } 1747 1748 /* -------------------------------------------------------------------*/ 1749 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1750 MatGetRow_MPISBAIJ, 1751 MatRestoreRow_MPISBAIJ, 1752 MatMult_MPISBAIJ, 1753 /* 4*/ MatMultAdd_MPISBAIJ, 1754 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1755 MatMultAdd_MPISBAIJ, 1756 0, 1757 0, 1758 0, 1759 /* 10*/ 0, 1760 0, 1761 0, 1762 MatSOR_MPISBAIJ, 1763 MatTranspose_MPISBAIJ, 1764 /* 15*/ MatGetInfo_MPISBAIJ, 1765 MatEqual_MPISBAIJ, 1766 MatGetDiagonal_MPISBAIJ, 1767 MatDiagonalScale_MPISBAIJ, 1768 MatNorm_MPISBAIJ, 1769 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1770 MatAssemblyEnd_MPISBAIJ, 1771 MatSetOption_MPISBAIJ, 1772 MatZeroEntries_MPISBAIJ, 1773 /* 24*/ 0, 1774 0, 1775 0, 1776 0, 1777 0, 1778 /* 29*/ MatSetUp_MPISBAIJ, 1779 0, 1780 0, 1781 0, 1782 0, 1783 /* 34*/ MatDuplicate_MPISBAIJ, 1784 0, 1785 0, 1786 0, 1787 0, 1788 /* 39*/ MatAXPY_MPISBAIJ, 1789 MatGetSubMatrices_MPISBAIJ, 1790 MatIncreaseOverlap_MPISBAIJ, 1791 MatGetValues_MPISBAIJ, 1792 MatCopy_MPISBAIJ, 1793 /* 44*/ 0, 1794 MatScale_MPISBAIJ, 1795 MatShift_MPISBAIJ, 1796 0, 1797 0, 1798 /* 49*/ 0, 1799 0, 1800 0, 1801 0, 1802 0, 1803 /* 54*/ 0, 1804 0, 1805 MatSetUnfactored_MPISBAIJ, 1806 0, 1807 MatSetValuesBlocked_MPISBAIJ, 1808 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1809 0, 1810 0, 1811 0, 1812 0, 1813 /* 64*/ 0, 1814 0, 1815 0, 1816 0, 1817 0, 1818 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1819 0, 1820 0, 1821 0, 1822 0, 1823 /* 74*/ 0, 1824 0, 1825 0, 1826 0, 1827 0, 1828 /* 79*/ 0, 1829 0, 1830 0, 1831 0, 1832 MatLoad_MPISBAIJ, 1833 /* 84*/ 0, 1834 0, 1835 0, 1836 0, 1837 0, 1838 /* 89*/ 0, 1839 0, 1840 0, 1841 0, 1842 0, 1843 /* 94*/ 0, 1844 0, 1845 0, 1846 0, 1847 0, 1848 /* 99*/ 0, 1849 0, 1850 0, 1851 0, 1852 0, 1853 /*104*/ 0, 1854 MatRealPart_MPISBAIJ, 1855 MatImaginaryPart_MPISBAIJ, 1856 MatGetRowUpperTriangular_MPISBAIJ, 1857 MatRestoreRowUpperTriangular_MPISBAIJ, 1858 /*109*/ 0, 1859 0, 1860 0, 1861 0, 1862 0, 1863 /*114*/ 0, 1864 0, 1865 0, 1866 0, 1867 0, 1868 /*119*/ 0, 1869 0, 1870 0, 1871 0, 1872 0, 1873 /*124*/ 0, 1874 0, 1875 0, 1876 0, 1877 0, 1878 /*129*/ 0, 1879 0, 1880 0, 1881 0, 1882 0, 1883 /*134*/ 0, 1884 0, 1885 0, 1886 0, 1887 0, 1888 /*139*/ 0, 1889 0, 1890 0, 1891 0, 1892 0, 1893 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1894 }; 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1898 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1899 { 1900 PetscFunctionBegin; 1901 *a = ((Mat_MPISBAIJ*)A->data)->A; 1902 PetscFunctionReturn(0); 1903 } 1904 1905 #undef __FUNCT__ 1906 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1907 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1908 { 1909 Mat_MPISBAIJ *b; 1910 PetscErrorCode ierr; 1911 PetscInt i,mbs,Mbs; 1912 1913 PetscFunctionBegin; 1914 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1915 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1916 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1917 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1918 1919 b = (Mat_MPISBAIJ*)B->data; 1920 mbs = B->rmap->n/bs; 1921 Mbs = B->rmap->N/bs; 1922 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1923 1924 B->rmap->bs = bs; 1925 b->bs2 = bs*bs; 1926 b->mbs = mbs; 1927 b->Mbs = Mbs; 1928 b->nbs = B->cmap->n/bs; 1929 b->Nbs = B->cmap->N/bs; 1930 1931 for (i=0; i<=b->size; i++) { 1932 b->rangebs[i] = B->rmap->range[i]/bs; 1933 } 1934 b->rstartbs = B->rmap->rstart/bs; 1935 b->rendbs = B->rmap->rend/bs; 1936 1937 b->cstartbs = B->cmap->rstart/bs; 1938 b->cendbs = B->cmap->rend/bs; 1939 1940 if (!B->preallocated) { 1941 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1942 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1943 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1944 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1945 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1946 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1947 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1948 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1949 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1950 } 1951 1952 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1953 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1954 1955 B->preallocated = PETSC_TRUE; 1956 PetscFunctionReturn(0); 1957 } 1958 1959 #undef __FUNCT__ 1960 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1961 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1962 { 1963 PetscInt m,rstart,cstart,cend; 1964 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1965 const PetscInt *JJ =0; 1966 PetscScalar *values=0; 1967 PetscErrorCode ierr; 1968 1969 PetscFunctionBegin; 1970 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1971 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1972 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1973 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1974 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1975 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1976 m = B->rmap->n/bs; 1977 rstart = B->rmap->rstart/bs; 1978 cstart = B->cmap->rstart/bs; 1979 cend = B->cmap->rend/bs; 1980 1981 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1982 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1983 for (i=0; i<m; i++) { 1984 nz = ii[i+1] - ii[i]; 1985 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1986 nz_max = PetscMax(nz_max,nz); 1987 JJ = jj + ii[i]; 1988 for (j=0; j<nz; j++) { 1989 if (*JJ >= cstart) break; 1990 JJ++; 1991 } 1992 d = 0; 1993 for (; j<nz; j++) { 1994 if (*JJ++ >= cend) break; 1995 d++; 1996 } 1997 d_nnz[i] = d; 1998 o_nnz[i] = nz - d; 1999 } 2000 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2001 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2002 2003 values = (PetscScalar*)V; 2004 if (!values) { 2005 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2006 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2007 } 2008 for (i=0; i<m; i++) { 2009 PetscInt row = i + rstart; 2010 PetscInt ncols = ii[i+1] - ii[i]; 2011 const PetscInt *icols = jj + ii[i]; 2012 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2013 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2014 } 2015 2016 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2017 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2018 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2019 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 /*MC 2024 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2025 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2026 the matrix is stored. 2027 2028 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2029 can call MatSetOption(Mat, MAT_HERMITIAN); 2030 2031 Options Database Keys: 2032 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2033 2034 Level: beginner 2035 2036 .seealso: MatCreateMPISBAIJ 2037 M*/ 2038 2039 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2040 2041 #undef __FUNCT__ 2042 #define __FUNCT__ "MatCreate_MPISBAIJ" 2043 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2044 { 2045 Mat_MPISBAIJ *b; 2046 PetscErrorCode ierr; 2047 PetscBool flg = PETSC_FALSE; 2048 2049 PetscFunctionBegin; 2050 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2051 B->data = (void*)b; 2052 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2053 2054 B->ops->destroy = MatDestroy_MPISBAIJ; 2055 B->ops->view = MatView_MPISBAIJ; 2056 B->assembled = PETSC_FALSE; 2057 B->insertmode = NOT_SET_VALUES; 2058 2059 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2060 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2061 2062 /* build local table of row and column ownerships */ 2063 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2064 2065 /* build cache for off array entries formed */ 2066 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2067 2068 b->donotstash = PETSC_FALSE; 2069 b->colmap = NULL; 2070 b->garray = NULL; 2071 b->roworiented = PETSC_TRUE; 2072 2073 /* stuff used in block assembly */ 2074 b->barray = 0; 2075 2076 /* stuff used for matrix vector multiply */ 2077 b->lvec = 0; 2078 b->Mvctx = 0; 2079 b->slvec0 = 0; 2080 b->slvec0b = 0; 2081 b->slvec1 = 0; 2082 b->slvec1a = 0; 2083 b->slvec1b = 0; 2084 b->sMvctx = 0; 2085 2086 /* stuff for MatGetRow() */ 2087 b->rowindices = 0; 2088 b->rowvalues = 0; 2089 b->getrowactive = PETSC_FALSE; 2090 2091 /* hash table stuff */ 2092 b->ht = 0; 2093 b->hd = 0; 2094 b->ht_size = 0; 2095 b->ht_flag = PETSC_FALSE; 2096 b->ht_fact = 0; 2097 b->ht_total_ct = 0; 2098 b->ht_insert_ct = 0; 2099 2100 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2101 b->ijonly = PETSC_FALSE; 2102 2103 b->in_loc = 0; 2104 b->v_loc = 0; 2105 b->n_loc = 0; 2106 2107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 2113 #if defined(PETSC_HAVE_ELEMENTAL) 2114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2115 #endif 2116 2117 B->symmetric = PETSC_TRUE; 2118 B->structurally_symmetric = PETSC_TRUE; 2119 B->symmetric_set = PETSC_TRUE; 2120 B->structurally_symmetric_set = PETSC_TRUE; 2121 2122 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2123 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2124 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2125 if (flg) { 2126 PetscReal fact = 1.39; 2127 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2128 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2129 if (fact <= 1.0) fact = 1.39; 2130 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2131 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2132 } 2133 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 /*MC 2138 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2139 2140 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2141 and MATMPISBAIJ otherwise. 2142 2143 Options Database Keys: 2144 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2145 2146 Level: beginner 2147 2148 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2149 M*/ 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2153 /*@C 2154 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2155 the user should preallocate the matrix storage by setting the parameters 2156 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2157 performance can be increased by more than a factor of 50. 2158 2159 Collective on Mat 2160 2161 Input Parameters: 2162 + B - the matrix 2163 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2164 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2165 . d_nz - number of block nonzeros per block row in diagonal portion of local 2166 submatrix (same for all local rows) 2167 . d_nnz - array containing the number of block nonzeros in the various block rows 2168 in the upper triangular and diagonal part of the in diagonal portion of the local 2169 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2170 for the diagonal entry and set a value even if it is zero. 2171 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2172 submatrix (same for all local rows). 2173 - o_nnz - array containing the number of nonzeros in the various block rows of the 2174 off-diagonal portion of the local submatrix that is right of the diagonal 2175 (possibly different for each block row) or NULL. 2176 2177 2178 Options Database Keys: 2179 . -mat_no_unroll - uses code that does not unroll the loops in the 2180 block calculations (much slower) 2181 . -mat_block_size - size of the blocks to use 2182 2183 Notes: 2184 2185 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2186 than it must be used on all processors that share the object for that argument. 2187 2188 If the *_nnz parameter is given then the *_nz parameter is ignored 2189 2190 Storage Information: 2191 For a square global matrix we define each processor's diagonal portion 2192 to be its local rows and the corresponding columns (a square submatrix); 2193 each processor's off-diagonal portion encompasses the remainder of the 2194 local matrix (a rectangular submatrix). 2195 2196 The user can specify preallocated storage for the diagonal part of 2197 the local submatrix with either d_nz or d_nnz (not both). Set 2198 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2199 memory allocation. Likewise, specify preallocated storage for the 2200 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2201 2202 You can call MatGetInfo() to get information on how effective the preallocation was; 2203 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2204 You can also run with the option -info and look for messages with the string 2205 malloc in them to see if additional memory allocation was needed. 2206 2207 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2208 the figure below we depict these three local rows and all columns (0-11). 2209 2210 .vb 2211 0 1 2 3 4 5 6 7 8 9 10 11 2212 -------------------------- 2213 row 3 |. . . d d d o o o o o o 2214 row 4 |. . . d d d o o o o o o 2215 row 5 |. . . d d d o o o o o o 2216 -------------------------- 2217 .ve 2218 2219 Thus, any entries in the d locations are stored in the d (diagonal) 2220 submatrix, and any entries in the o locations are stored in the 2221 o (off-diagonal) submatrix. Note that the d matrix is stored in 2222 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2223 2224 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2225 plus the diagonal part of the d matrix, 2226 and o_nz should indicate the number of block nonzeros per row in the o matrix 2227 2228 In general, for PDE problems in which most nonzeros are near the diagonal, 2229 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2230 or you will get TERRIBLE performance; see the users' manual chapter on 2231 matrices. 2232 2233 Level: intermediate 2234 2235 .keywords: matrix, block, aij, compressed row, sparse, parallel 2236 2237 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2238 @*/ 2239 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2240 { 2241 PetscErrorCode ierr; 2242 2243 PetscFunctionBegin; 2244 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2245 PetscValidType(B,1); 2246 PetscValidLogicalCollectiveInt(B,bs,2); 2247 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2248 PetscFunctionReturn(0); 2249 } 2250 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "MatCreateSBAIJ" 2253 /*@C 2254 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2255 (block compressed row). For good matrix assembly performance 2256 the user should preallocate the matrix storage by setting the parameters 2257 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2258 performance can be increased by more than a factor of 50. 2259 2260 Collective on MPI_Comm 2261 2262 Input Parameters: 2263 + comm - MPI communicator 2264 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2265 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2266 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2267 This value should be the same as the local size used in creating the 2268 y vector for the matrix-vector product y = Ax. 2269 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2270 This value should be the same as the local size used in creating the 2271 x vector for the matrix-vector product y = Ax. 2272 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2273 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2274 . d_nz - number of block nonzeros per block row in diagonal portion of local 2275 submatrix (same for all local rows) 2276 . d_nnz - array containing the number of block nonzeros in the various block rows 2277 in the upper triangular portion of the in diagonal portion of the local 2278 (possibly different for each block block row) or NULL. 2279 If you plan to factor the matrix you must leave room for the diagonal entry and 2280 set its value even if it is zero. 2281 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2282 submatrix (same for all local rows). 2283 - o_nnz - array containing the number of nonzeros in the various block rows of the 2284 off-diagonal portion of the local submatrix (possibly different for 2285 each block row) or NULL. 2286 2287 Output Parameter: 2288 . A - the matrix 2289 2290 Options Database Keys: 2291 . -mat_no_unroll - uses code that does not unroll the loops in the 2292 block calculations (much slower) 2293 . -mat_block_size - size of the blocks to use 2294 . -mat_mpi - use the parallel matrix data structures even on one processor 2295 (defaults to using SeqBAIJ format on one processor) 2296 2297 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2298 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2299 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2300 2301 Notes: 2302 The number of rows and columns must be divisible by blocksize. 2303 This matrix type does not support complex Hermitian operation. 2304 2305 The user MUST specify either the local or global matrix dimensions 2306 (possibly both). 2307 2308 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2309 than it must be used on all processors that share the object for that argument. 2310 2311 If the *_nnz parameter is given then the *_nz parameter is ignored 2312 2313 Storage Information: 2314 For a square global matrix we define each processor's diagonal portion 2315 to be its local rows and the corresponding columns (a square submatrix); 2316 each processor's off-diagonal portion encompasses the remainder of the 2317 local matrix (a rectangular submatrix). 2318 2319 The user can specify preallocated storage for the diagonal part of 2320 the local submatrix with either d_nz or d_nnz (not both). Set 2321 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2322 memory allocation. Likewise, specify preallocated storage for the 2323 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2324 2325 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2326 the figure below we depict these three local rows and all columns (0-11). 2327 2328 .vb 2329 0 1 2 3 4 5 6 7 8 9 10 11 2330 -------------------------- 2331 row 3 |. . . d d d o o o o o o 2332 row 4 |. . . d d d o o o o o o 2333 row 5 |. . . d d d o o o o o o 2334 -------------------------- 2335 .ve 2336 2337 Thus, any entries in the d locations are stored in the d (diagonal) 2338 submatrix, and any entries in the o locations are stored in the 2339 o (off-diagonal) submatrix. Note that the d matrix is stored in 2340 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2341 2342 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2343 plus the diagonal part of the d matrix, 2344 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2345 In general, for PDE problems in which most nonzeros are near the diagonal, 2346 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2347 or you will get TERRIBLE performance; see the users' manual chapter on 2348 matrices. 2349 2350 Level: intermediate 2351 2352 .keywords: matrix, block, aij, compressed row, sparse, parallel 2353 2354 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2355 @*/ 2356 2357 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2358 { 2359 PetscErrorCode ierr; 2360 PetscMPIInt size; 2361 2362 PetscFunctionBegin; 2363 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2364 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2365 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2366 if (size > 1) { 2367 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2368 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2369 } else { 2370 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2371 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2372 } 2373 PetscFunctionReturn(0); 2374 } 2375 2376 2377 #undef __FUNCT__ 2378 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2379 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2380 { 2381 Mat mat; 2382 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2383 PetscErrorCode ierr; 2384 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2385 PetscScalar *array; 2386 2387 PetscFunctionBegin; 2388 *newmat = 0; 2389 2390 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2391 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2392 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2393 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2394 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2395 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2396 2397 mat->factortype = matin->factortype; 2398 mat->preallocated = PETSC_TRUE; 2399 mat->assembled = PETSC_TRUE; 2400 mat->insertmode = NOT_SET_VALUES; 2401 2402 a = (Mat_MPISBAIJ*)mat->data; 2403 a->bs2 = oldmat->bs2; 2404 a->mbs = oldmat->mbs; 2405 a->nbs = oldmat->nbs; 2406 a->Mbs = oldmat->Mbs; 2407 a->Nbs = oldmat->Nbs; 2408 2409 2410 a->size = oldmat->size; 2411 a->rank = oldmat->rank; 2412 a->donotstash = oldmat->donotstash; 2413 a->roworiented = oldmat->roworiented; 2414 a->rowindices = 0; 2415 a->rowvalues = 0; 2416 a->getrowactive = PETSC_FALSE; 2417 a->barray = 0; 2418 a->rstartbs = oldmat->rstartbs; 2419 a->rendbs = oldmat->rendbs; 2420 a->cstartbs = oldmat->cstartbs; 2421 a->cendbs = oldmat->cendbs; 2422 2423 /* hash table stuff */ 2424 a->ht = 0; 2425 a->hd = 0; 2426 a->ht_size = 0; 2427 a->ht_flag = oldmat->ht_flag; 2428 a->ht_fact = oldmat->ht_fact; 2429 a->ht_total_ct = 0; 2430 a->ht_insert_ct = 0; 2431 2432 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2433 if (oldmat->colmap) { 2434 #if defined(PETSC_USE_CTABLE) 2435 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2436 #else 2437 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2438 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2439 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2440 #endif 2441 } else a->colmap = 0; 2442 2443 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2444 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2445 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2446 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2447 } else a->garray = 0; 2448 2449 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2450 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2451 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2452 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2453 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2454 2455 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2456 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2457 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2458 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2459 2460 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2461 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2462 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2463 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2464 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2465 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2466 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2467 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2468 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2469 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2470 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2471 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2472 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2473 2474 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2475 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2476 a->sMvctx = oldmat->sMvctx; 2477 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2478 2479 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2480 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2481 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2482 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2483 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2484 *newmat = mat; 2485 PetscFunctionReturn(0); 2486 } 2487 2488 #undef __FUNCT__ 2489 #define __FUNCT__ "MatLoad_MPISBAIJ" 2490 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2491 { 2492 PetscErrorCode ierr; 2493 PetscInt i,nz,j,rstart,rend; 2494 PetscScalar *vals,*buf; 2495 MPI_Comm comm; 2496 MPI_Status status; 2497 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2498 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2499 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2500 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2501 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2502 PetscInt dcount,kmax,k,nzcount,tmp; 2503 int fd; 2504 2505 PetscFunctionBegin; 2506 /* force binary viewer to load .info file if it has not yet done so */ 2507 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2508 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2509 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2510 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2511 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2512 if (bs < 0) bs = 1; 2513 2514 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2515 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2516 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2517 if (!rank) { 2518 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2519 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2520 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2521 } 2522 2523 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2524 M = header[1]; 2525 N = header[2]; 2526 2527 /* If global sizes are set, check if they are consistent with that given in the file */ 2528 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2529 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2530 2531 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2532 2533 /* 2534 This code adds extra rows to make sure the number of rows is 2535 divisible by the blocksize 2536 */ 2537 Mbs = M/bs; 2538 extra_rows = bs - M + bs*(Mbs); 2539 if (extra_rows == bs) extra_rows = 0; 2540 else Mbs++; 2541 if (extra_rows &&!rank) { 2542 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2543 } 2544 2545 /* determine ownership of all rows */ 2546 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2547 mbs = Mbs/size + ((Mbs % size) > rank); 2548 m = mbs*bs; 2549 } else { /* User Set */ 2550 m = newmat->rmap->n; 2551 mbs = m/bs; 2552 } 2553 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2554 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2555 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2556 rowners[0] = 0; 2557 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2558 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2559 rstart = rowners[rank]; 2560 rend = rowners[rank+1]; 2561 2562 /* distribute row lengths to all processors */ 2563 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2564 if (!rank) { 2565 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2566 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2567 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2568 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2569 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2570 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2571 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2572 } else { 2573 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2574 } 2575 2576 if (!rank) { /* procs[0] */ 2577 /* calculate the number of nonzeros on each processor */ 2578 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2579 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2580 for (i=0; i<size; i++) { 2581 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2582 procsnz[i] += rowlengths[j]; 2583 } 2584 } 2585 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2586 2587 /* determine max buffer needed and allocate it */ 2588 maxnz = 0; 2589 for (i=0; i<size; i++) { 2590 maxnz = PetscMax(maxnz,procsnz[i]); 2591 } 2592 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2593 2594 /* read in my part of the matrix column indices */ 2595 nz = procsnz[0]; 2596 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2597 mycols = ibuf; 2598 if (size == 1) nz -= extra_rows; 2599 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2600 if (size == 1) { 2601 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2602 } 2603 2604 /* read in every ones (except the last) and ship off */ 2605 for (i=1; i<size-1; i++) { 2606 nz = procsnz[i]; 2607 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2608 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2609 } 2610 /* read in the stuff for the last proc */ 2611 if (size != 1) { 2612 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2613 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2614 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2615 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2616 } 2617 ierr = PetscFree(cols);CHKERRQ(ierr); 2618 } else { /* procs[i], i>0 */ 2619 /* determine buffer space needed for message */ 2620 nz = 0; 2621 for (i=0; i<m; i++) nz += locrowlens[i]; 2622 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2623 mycols = ibuf; 2624 /* receive message of column indices*/ 2625 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2626 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2627 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2628 } 2629 2630 /* loop over local rows, determining number of off diagonal entries */ 2631 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2632 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2633 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2634 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2635 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2636 rowcount = 0; 2637 nzcount = 0; 2638 for (i=0; i<mbs; i++) { 2639 dcount = 0; 2640 odcount = 0; 2641 for (j=0; j<bs; j++) { 2642 kmax = locrowlens[rowcount]; 2643 for (k=0; k<kmax; k++) { 2644 tmp = mycols[nzcount++]/bs; /* block col. index */ 2645 if (!mask[tmp]) { 2646 mask[tmp] = 1; 2647 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2648 else masked1[dcount++] = tmp; /* entry in diag portion */ 2649 } 2650 } 2651 rowcount++; 2652 } 2653 2654 dlens[i] = dcount; /* d_nzz[i] */ 2655 odlens[i] = odcount; /* o_nzz[i] */ 2656 2657 /* zero out the mask elements we set */ 2658 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2659 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2660 } 2661 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2662 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2663 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2664 2665 if (!rank) { 2666 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2667 /* read in my part of the matrix numerical values */ 2668 nz = procsnz[0]; 2669 vals = buf; 2670 mycols = ibuf; 2671 if (size == 1) nz -= extra_rows; 2672 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2673 if (size == 1) { 2674 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2675 } 2676 2677 /* insert into matrix */ 2678 jj = rstart*bs; 2679 for (i=0; i<m; i++) { 2680 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2681 mycols += locrowlens[i]; 2682 vals += locrowlens[i]; 2683 jj++; 2684 } 2685 2686 /* read in other processors (except the last one) and ship out */ 2687 for (i=1; i<size-1; i++) { 2688 nz = procsnz[i]; 2689 vals = buf; 2690 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2691 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2692 } 2693 /* the last proc */ 2694 if (size != 1) { 2695 nz = procsnz[i] - extra_rows; 2696 vals = buf; 2697 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2698 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2699 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2700 } 2701 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2702 2703 } else { 2704 /* receive numeric values */ 2705 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2706 2707 /* receive message of values*/ 2708 vals = buf; 2709 mycols = ibuf; 2710 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2711 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2712 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2713 2714 /* insert into matrix */ 2715 jj = rstart*bs; 2716 for (i=0; i<m; i++) { 2717 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2718 mycols += locrowlens[i]; 2719 vals += locrowlens[i]; 2720 jj++; 2721 } 2722 } 2723 2724 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2725 ierr = PetscFree(buf);CHKERRQ(ierr); 2726 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2727 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2728 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2729 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2730 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2731 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2732 PetscFunctionReturn(0); 2733 } 2734 2735 #undef __FUNCT__ 2736 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2737 /*XXXXX@ 2738 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2739 2740 Input Parameters: 2741 . mat - the matrix 2742 . fact - factor 2743 2744 Not Collective on Mat, each process can have a different hash factor 2745 2746 Level: advanced 2747 2748 Notes: 2749 This can also be set by the command line option: -mat_use_hash_table fact 2750 2751 .keywords: matrix, hashtable, factor, HT 2752 2753 .seealso: MatSetOption() 2754 @XXXXX*/ 2755 2756 2757 #undef __FUNCT__ 2758 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2759 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2760 { 2761 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2762 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2763 PetscReal atmp; 2764 PetscReal *work,*svalues,*rvalues; 2765 PetscErrorCode ierr; 2766 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2767 PetscMPIInt rank,size; 2768 PetscInt *rowners_bs,dest,count,source; 2769 PetscScalar *va; 2770 MatScalar *ba; 2771 MPI_Status stat; 2772 2773 PetscFunctionBegin; 2774 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2775 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2776 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2777 2778 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2779 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2780 2781 bs = A->rmap->bs; 2782 mbs = a->mbs; 2783 Mbs = a->Mbs; 2784 ba = b->a; 2785 bi = b->i; 2786 bj = b->j; 2787 2788 /* find ownerships */ 2789 rowners_bs = A->rmap->range; 2790 2791 /* each proc creates an array to be distributed */ 2792 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2793 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2794 2795 /* row_max for B */ 2796 if (rank != size-1) { 2797 for (i=0; i<mbs; i++) { 2798 ncols = bi[1] - bi[0]; bi++; 2799 brow = bs*i; 2800 for (j=0; j<ncols; j++) { 2801 bcol = bs*(*bj); 2802 for (kcol=0; kcol<bs; kcol++) { 2803 col = bcol + kcol; /* local col index */ 2804 col += rowners_bs[rank+1]; /* global col index */ 2805 for (krow=0; krow<bs; krow++) { 2806 atmp = PetscAbsScalar(*ba); ba++; 2807 row = brow + krow; /* local row index */ 2808 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2809 if (work[col] < atmp) work[col] = atmp; 2810 } 2811 } 2812 bj++; 2813 } 2814 } 2815 2816 /* send values to its owners */ 2817 for (dest=rank+1; dest<size; dest++) { 2818 svalues = work + rowners_bs[dest]; 2819 count = rowners_bs[dest+1]-rowners_bs[dest]; 2820 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2821 } 2822 } 2823 2824 /* receive values */ 2825 if (rank) { 2826 rvalues = work; 2827 count = rowners_bs[rank+1]-rowners_bs[rank]; 2828 for (source=0; source<rank; source++) { 2829 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2830 /* process values */ 2831 for (i=0; i<count; i++) { 2832 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2833 } 2834 } 2835 } 2836 2837 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2838 ierr = PetscFree(work);CHKERRQ(ierr); 2839 PetscFunctionReturn(0); 2840 } 2841 2842 #undef __FUNCT__ 2843 #define __FUNCT__ "MatSOR_MPISBAIJ" 2844 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2845 { 2846 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2847 PetscErrorCode ierr; 2848 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2849 PetscScalar *x,*ptr,*from; 2850 Vec bb1; 2851 const PetscScalar *b; 2852 2853 PetscFunctionBegin; 2854 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2855 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2856 2857 if (flag == SOR_APPLY_UPPER) { 2858 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2859 PetscFunctionReturn(0); 2860 } 2861 2862 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2863 if (flag & SOR_ZERO_INITIAL_GUESS) { 2864 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2865 its--; 2866 } 2867 2868 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2869 while (its--) { 2870 2871 /* lower triangular part: slvec0b = - B^T*xx */ 2872 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2873 2874 /* copy xx into slvec0a */ 2875 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2876 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2877 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2878 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2879 2880 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2881 2882 /* copy bb into slvec1a */ 2883 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2884 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2885 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2886 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2887 2888 /* set slvec1b = 0 */ 2889 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2890 2891 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2892 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2893 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2894 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2895 2896 /* upper triangular part: bb1 = bb1 - B*x */ 2897 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2898 2899 /* local diagonal sweep */ 2900 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2901 } 2902 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2903 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2904 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2905 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2906 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2907 } else if (flag & SOR_EISENSTAT) { 2908 Vec xx1; 2909 PetscBool hasop; 2910 const PetscScalar *diag; 2911 PetscScalar *sl,scale = (omega - 2.0)/omega; 2912 PetscInt i,n; 2913 2914 if (!mat->xx1) { 2915 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2916 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2917 } 2918 xx1 = mat->xx1; 2919 bb1 = mat->bb1; 2920 2921 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2922 2923 if (!mat->diag) { 2924 /* this is wrong for same matrix with new nonzero values */ 2925 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2926 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2927 } 2928 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2929 2930 if (hasop) { 2931 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2932 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2933 } else { 2934 /* 2935 These two lines are replaced by code that may be a bit faster for a good compiler 2936 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2937 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2938 */ 2939 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2940 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2941 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2942 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2943 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2944 if (omega == 1.0) { 2945 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2946 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2947 } else { 2948 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2949 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2950 } 2951 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2952 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2953 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2954 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2955 } 2956 2957 /* multiply off-diagonal portion of matrix */ 2958 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2959 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2960 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2961 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2962 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2963 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2964 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2965 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2966 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2967 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2968 2969 /* local sweep */ 2970 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2971 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2972 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2973 PetscFunctionReturn(0); 2974 } 2975 2976 #undef __FUNCT__ 2977 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2978 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2979 { 2980 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2981 PetscErrorCode ierr; 2982 Vec lvec1,bb1; 2983 2984 PetscFunctionBegin; 2985 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2986 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2987 2988 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2989 if (flag & SOR_ZERO_INITIAL_GUESS) { 2990 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2991 its--; 2992 } 2993 2994 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2995 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2996 while (its--) { 2997 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2998 2999 /* lower diagonal part: bb1 = bb - B^T*xx */ 3000 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3001 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3002 3003 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3004 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3005 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3006 3007 /* upper diagonal part: bb1 = bb1 - B*x */ 3008 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3009 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3010 3011 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3012 3013 /* diagonal sweep */ 3014 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3015 } 3016 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3017 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3018 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3019 PetscFunctionReturn(0); 3020 } 3021 3022 #undef __FUNCT__ 3023 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3024 /*@ 3025 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3026 CSR format the local rows. 3027 3028 Collective on MPI_Comm 3029 3030 Input Parameters: 3031 + comm - MPI communicator 3032 . bs - the block size, only a block size of 1 is supported 3033 . m - number of local rows (Cannot be PETSC_DECIDE) 3034 . n - This value should be the same as the local size used in creating the 3035 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3036 calculated if N is given) For square matrices n is almost always m. 3037 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3038 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3039 . i - row indices 3040 . j - column indices 3041 - a - matrix values 3042 3043 Output Parameter: 3044 . mat - the matrix 3045 3046 Level: intermediate 3047 3048 Notes: 3049 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3050 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3051 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3052 3053 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3054 3055 .keywords: matrix, aij, compressed row, sparse, parallel 3056 3057 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3058 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3059 @*/ 3060 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3061 { 3062 PetscErrorCode ierr; 3063 3064 3065 PetscFunctionBegin; 3066 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3067 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3068 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3069 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3070 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3071 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3072 PetscFunctionReturn(0); 3073 } 3074 3075 3076 #undef __FUNCT__ 3077 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3078 /*@C 3079 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3080 (the default parallel PETSc format). 3081 3082 Collective on MPI_Comm 3083 3084 Input Parameters: 3085 + B - the matrix 3086 . bs - the block size 3087 . i - the indices into j for the start of each local row (starts with zero) 3088 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3089 - v - optional values in the matrix 3090 3091 Level: developer 3092 3093 .keywords: matrix, aij, compressed row, sparse, parallel 3094 3095 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3096 @*/ 3097 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3098 { 3099 PetscErrorCode ierr; 3100 3101 PetscFunctionBegin; 3102 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3103 PetscFunctionReturn(0); 3104 } 3105 3106 #undef __FUNCT__ 3107 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3108 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3109 { 3110 PetscErrorCode ierr; 3111 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3112 PetscInt *indx; 3113 PetscScalar *values; 3114 3115 PetscFunctionBegin; 3116 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3117 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3118 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3119 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3120 PetscInt *bindx,rmax=a->rmax,j; 3121 3122 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3123 mbs = m/bs; Nbs = N/cbs; 3124 if (n == PETSC_DECIDE) { 3125 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3126 } 3127 /* Check sum(n) = Nbs */ 3128 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3129 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3130 3131 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3132 rstart -= mbs; 3133 3134 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3135 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3136 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3137 for (i=0; i<mbs; i++) { 3138 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3139 nnz = nnz/bs; 3140 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3141 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3142 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3143 } 3144 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3145 ierr = PetscFree(bindx);CHKERRQ(ierr); 3146 3147 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3148 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3149 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3150 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3151 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3152 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3153 } 3154 3155 /* numeric phase */ 3156 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3157 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3158 3159 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3160 for (i=0; i<m; i++) { 3161 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3162 Ii = i + rstart; 3163 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3164 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3165 } 3166 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3167 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3168 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3169 PetscFunctionReturn(0); 3170 } 3171