1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 } 637 /* Bmat */ 638 v = bmat->a; jj = bmat->j; 639 for (brow=0; brow<mbs; brow++) { 640 grow = bs*(rstart + brow); 641 nz = bmat->i[brow+1] - bmat->i[brow]; 642 for (bcol=0; bcol<nz; bcol++) { 643 gcol = bs*garray[*jj]; jj++; 644 for (col=0; col<bs; col++) { 645 for (row=0; row<bs; row++) { 646 vabs = PetscAbsScalar(*v); v++; 647 rsum[gcol+col] += vabs; 648 rsum[grow+row] += vabs; 649 } 650 } 651 } 652 } 653 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 654 *norm = 0.0; 655 for (col=0; col<mat->cmap->N; col++) { 656 if (rsum2[col] > *norm) *norm = rsum2[col]; 657 } 658 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 660 } 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 667 { 668 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 669 PetscErrorCode ierr; 670 PetscInt nstash,reallocs; 671 672 PetscFunctionBegin; 673 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 674 675 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 676 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 677 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 678 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 679 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 680 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 686 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 687 { 688 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 689 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 690 PetscErrorCode ierr; 691 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscMPIInt n; 695 PetscBool r1,r2,r3; 696 MatScalar *val; 697 698 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 699 PetscFunctionBegin; 700 if (!baij->donotstash && !mat->nooffprocentries) { 701 while (1) { 702 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 703 if (!flg) break; 704 705 for (i=0; i<n;) { 706 /* Now identify the consecutive vals belonging to the same row */ 707 for (j=i,rstart=row[j]; j<n; j++) { 708 if (row[j] != rstart) break; 709 } 710 if (j < n) ncols = j-i; 711 else ncols = n-i; 712 /* Now assemble all these values with a single function call */ 713 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 /* Now process the block-stash. Since the values are stashed column-oriented, 719 set the roworiented flag to column oriented, and after MatSetValues() 720 restore the original flags */ 721 r1 = baij->roworiented; 722 r2 = a->roworiented; 723 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 724 725 baij->roworiented = PETSC_FALSE; 726 a->roworiented = PETSC_FALSE; 727 728 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 729 while (1) { 730 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 731 if (!flg) break; 732 733 for (i=0; i<n;) { 734 /* Now identify the consecutive vals belonging to the same row */ 735 for (j=i,rstart=row[j]; j<n; j++) { 736 if (row[j] != rstart) break; 737 } 738 if (j < n) ncols = j-i; 739 else ncols = n-i; 740 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 741 i = j; 742 } 743 } 744 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 745 746 baij->roworiented = r1; 747 a->roworiented = r2; 748 749 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 750 } 751 752 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 753 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 754 755 /* determine if any processor has disassembled, if so we must 756 also disassemble ourselfs, in order that we may reassemble. */ 757 /* 758 if nonzero structure of submatrix B cannot change then we know that 759 no processor disassembled thus we can skip this stuff 760 */ 761 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 762 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 763 if (mat->was_assembled && !other_disassembled) { 764 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 765 } 766 } 767 768 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 769 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 770 } 771 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 772 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 773 774 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 775 776 baij->rowvalues = 0; 777 778 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 779 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 780 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 781 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 782 } 783 PetscFunctionReturn(0); 784 } 785 786 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 787 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 788 #include <petscdraw.h> 789 #undef __FUNCT__ 790 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 791 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 792 { 793 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 794 PetscErrorCode ierr; 795 PetscInt bs = mat->rmap->bs; 796 PetscMPIInt rank = baij->rank; 797 PetscBool iascii,isdraw; 798 PetscViewer sviewer; 799 PetscViewerFormat format; 800 801 PetscFunctionBegin; 802 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 803 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 804 if (iascii) { 805 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 806 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 807 MatInfo info; 808 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 809 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 810 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 811 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 812 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 814 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 816 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 818 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 819 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } else if (format == PETSC_VIEWER_ASCII_INFO) { 822 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 825 PetscFunctionReturn(0); 826 } 827 } 828 829 if (isdraw) { 830 PetscDraw draw; 831 PetscBool isnull; 832 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 833 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 834 } 835 836 { 837 /* assemble the entire matrix onto first processor. */ 838 Mat A; 839 Mat_SeqSBAIJ *Aloc; 840 Mat_SeqBAIJ *Bloc; 841 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 842 MatScalar *a; 843 const char *matname; 844 845 /* Should this be the same type as mat? */ 846 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 847 if (!rank) { 848 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 849 } else { 850 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 851 } 852 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 853 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 854 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 855 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 856 857 /* copy over the A part */ 858 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 859 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 860 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 861 862 for (i=0; i<mbs; i++) { 863 rvals[0] = bs*(baij->rstartbs + i); 864 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 865 for (j=ai[i]; j<ai[i+1]; j++) { 866 col = (baij->cstartbs+aj[j])*bs; 867 for (k=0; k<bs; k++) { 868 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 869 col++; 870 a += bs; 871 } 872 } 873 } 874 /* copy over the B part */ 875 Bloc = (Mat_SeqBAIJ*)baij->B->data; 876 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 877 for (i=0; i<mbs; i++) { 878 879 rvals[0] = bs*(baij->rstartbs + i); 880 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 881 for (j=ai[i]; j<ai[i+1]; j++) { 882 col = baij->garray[aj[j]]*bs; 883 for (k=0; k<bs; k++) { 884 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 885 col++; 886 a += bs; 887 } 888 } 889 } 890 ierr = PetscFree(rvals);CHKERRQ(ierr); 891 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 892 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 893 /* 894 Everyone has to call to draw the matrix since the graphics waits are 895 synchronized across all processors that share the PetscDraw object 896 */ 897 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 898 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 899 if (!rank) { 900 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 901 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 902 } 903 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 904 ierr = MatDestroy(&A);CHKERRQ(ierr); 905 } 906 PetscFunctionReturn(0); 907 } 908 909 #undef __FUNCT__ 910 #define __FUNCT__ "MatView_MPISBAIJ" 911 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 912 { 913 PetscErrorCode ierr; 914 PetscBool iascii,isdraw,issocket,isbinary; 915 916 PetscFunctionBegin; 917 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 918 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 919 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 920 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 921 if (iascii || isdraw || issocket || isbinary) { 922 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 923 } 924 PetscFunctionReturn(0); 925 } 926 927 #undef __FUNCT__ 928 #define __FUNCT__ "MatDestroy_MPISBAIJ" 929 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 930 { 931 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 932 PetscErrorCode ierr; 933 934 PetscFunctionBegin; 935 #if defined(PETSC_USE_LOG) 936 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 937 #endif 938 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 939 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 940 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 941 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 942 #if defined(PETSC_USE_CTABLE) 943 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 944 #else 945 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 946 #endif 947 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 948 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 949 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 950 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 951 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 952 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 953 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 954 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 955 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 956 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 957 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 958 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 959 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 960 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 961 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 962 #if defined(PETSC_USE_REAL_MAT_SINGLE) 963 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 964 #endif 965 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 966 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 967 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 968 ierr = PetscFree(mat->data);CHKERRQ(ierr); 969 970 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 971 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 972 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 973 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 974 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 975 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 976 #if defined(PETSC_HAVE_ELEMENTAL) 977 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 978 #endif 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 984 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 985 { 986 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 987 PetscErrorCode ierr; 988 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 989 PetscScalar *from; 990 const PetscScalar *x; 991 992 PetscFunctionBegin; 993 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 994 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 995 996 /* diagonal part */ 997 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 998 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 999 1000 /* subdiagonal part */ 1001 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1002 1003 /* copy x into the vec slvec0 */ 1004 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1005 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1006 1007 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1008 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1009 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1010 1011 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1012 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1013 /* supperdiagonal part */ 1014 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatMult_MPISBAIJ" 1020 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1021 { 1022 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1023 PetscErrorCode ierr; 1024 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1025 PetscScalar *from; 1026 const PetscScalar *x; 1027 1028 PetscFunctionBegin; 1029 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1030 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1031 1032 /* diagonal part */ 1033 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1034 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1035 1036 /* subdiagonal part */ 1037 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1038 1039 /* copy x into the vec slvec0 */ 1040 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1041 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1042 1043 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1044 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1045 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1046 1047 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1048 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1049 /* supperdiagonal part */ 1050 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1056 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1057 { 1058 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1059 PetscErrorCode ierr; 1060 PetscInt nt; 1061 1062 PetscFunctionBegin; 1063 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1064 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1065 1066 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1067 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1068 1069 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070 /* do diagonal part */ 1071 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1072 /* do supperdiagonal part */ 1073 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1074 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1075 /* do subdiagonal part */ 1076 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1077 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1078 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1084 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1085 { 1086 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1087 PetscErrorCode ierr; 1088 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1089 PetscScalar *from,zero=0.0; 1090 const PetscScalar *x; 1091 1092 PetscFunctionBegin; 1093 /* 1094 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1095 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1096 */ 1097 /* diagonal part */ 1098 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1099 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1100 1101 /* subdiagonal part */ 1102 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1103 1104 /* copy x into the vec slvec0 */ 1105 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1106 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1107 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1108 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1109 1110 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1112 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 1114 /* supperdiagonal part */ 1115 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1121 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1122 { 1123 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1128 /* do diagonal part */ 1129 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1130 /* do supperdiagonal part */ 1131 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1132 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1133 1134 /* do subdiagonal part */ 1135 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1136 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1137 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 /* 1142 This only works correctly for square matrices where the subblock A->A is the 1143 diagonal block 1144 */ 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1147 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1148 { 1149 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1154 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1155 PetscFunctionReturn(0); 1156 } 1157 1158 #undef __FUNCT__ 1159 #define __FUNCT__ "MatScale_MPISBAIJ" 1160 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1161 { 1162 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1163 PetscErrorCode ierr; 1164 1165 PetscFunctionBegin; 1166 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1167 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1173 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1174 { 1175 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1176 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1177 PetscErrorCode ierr; 1178 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1179 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1180 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1181 1182 PetscFunctionBegin; 1183 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1184 mat->getrowactive = PETSC_TRUE; 1185 1186 if (!mat->rowvalues && (idx || v)) { 1187 /* 1188 allocate enough space to hold information from the longest row. 1189 */ 1190 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1191 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1192 PetscInt max = 1,mbs = mat->mbs,tmp; 1193 for (i=0; i<mbs; i++) { 1194 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1195 if (max < tmp) max = tmp; 1196 } 1197 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1198 } 1199 1200 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1201 lrow = row - brstart; /* local row index */ 1202 1203 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1204 if (!v) {pvA = 0; pvB = 0;} 1205 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1206 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1207 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1208 nztot = nzA + nzB; 1209 1210 cmap = mat->garray; 1211 if (v || idx) { 1212 if (nztot) { 1213 /* Sort by increasing column numbers, assuming A and B already sorted */ 1214 PetscInt imark = -1; 1215 if (v) { 1216 *v = v_p = mat->rowvalues; 1217 for (i=0; i<nzB; i++) { 1218 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1219 else break; 1220 } 1221 imark = i; 1222 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1223 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1224 } 1225 if (idx) { 1226 *idx = idx_p = mat->rowindices; 1227 if (imark > -1) { 1228 for (i=0; i<imark; i++) { 1229 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1230 } 1231 } else { 1232 for (i=0; i<nzB; i++) { 1233 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1234 else break; 1235 } 1236 imark = i; 1237 } 1238 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1239 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1240 } 1241 } else { 1242 if (idx) *idx = 0; 1243 if (v) *v = 0; 1244 } 1245 } 1246 *nz = nztot; 1247 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1248 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1254 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1255 { 1256 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1257 1258 PetscFunctionBegin; 1259 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1260 baij->getrowactive = PETSC_FALSE; 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1266 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1267 { 1268 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1269 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1270 1271 PetscFunctionBegin; 1272 aA->getrow_utriangular = PETSC_TRUE; 1273 PetscFunctionReturn(0); 1274 } 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1277 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1278 { 1279 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1280 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1281 1282 PetscFunctionBegin; 1283 aA->getrow_utriangular = PETSC_FALSE; 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1289 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1290 { 1291 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1292 PetscErrorCode ierr; 1293 1294 PetscFunctionBegin; 1295 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1296 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1302 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1303 { 1304 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1305 PetscErrorCode ierr; 1306 1307 PetscFunctionBegin; 1308 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1309 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1314 Input: isrow - distributed(parallel), 1315 iscol_local - locally owned (seq) 1316 */ 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "ISEqual_private" 1319 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1320 { 1321 PetscErrorCode ierr; 1322 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1323 const PetscInt *ptr1,*ptr2; 1324 1325 PetscFunctionBegin; 1326 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1327 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1328 if (sz1 > sz2) { 1329 *flg = PETSC_FALSE; 1330 PetscFunctionReturn(0); 1331 } 1332 1333 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1334 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1335 1336 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1337 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1338 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1339 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1340 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1341 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1342 1343 nmatch=0; 1344 k = 0; 1345 for (i=0; i<sz1; i++){ 1346 for (j=k; j<sz2; j++){ 1347 if (a1[i] == a2[j]) { 1348 k = j; nmatch++; 1349 break; 1350 } 1351 } 1352 } 1353 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1354 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1355 ierr = PetscFree(a1);CHKERRQ(ierr); 1356 ierr = PetscFree(a2);CHKERRQ(ierr); 1357 if (nmatch < sz1) { 1358 *flg = PETSC_FALSE; 1359 } else { 1360 *flg = PETSC_TRUE; 1361 } 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1367 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1368 { 1369 PetscErrorCode ierr; 1370 IS iscol_local; 1371 PetscInt csize; 1372 PetscBool isequal; 1373 1374 PetscFunctionBegin; 1375 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1376 if (call == MAT_REUSE_MATRIX) { 1377 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1378 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1379 } else { 1380 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1381 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1382 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1383 } 1384 1385 /* now call MatGetSubMatrix_MPIBAIJ() */ 1386 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1387 if (call == MAT_INITIAL_MATRIX) { 1388 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1389 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1390 } 1391 PetscFunctionReturn(0); 1392 } 1393 1394 #undef __FUNCT__ 1395 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1396 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1397 { 1398 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1399 PetscErrorCode ierr; 1400 1401 PetscFunctionBegin; 1402 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1403 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1409 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1410 { 1411 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1412 Mat A = a->A,B = a->B; 1413 PetscErrorCode ierr; 1414 PetscReal isend[5],irecv[5]; 1415 1416 PetscFunctionBegin; 1417 info->block_size = (PetscReal)matin->rmap->bs; 1418 1419 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1420 1421 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1422 isend[3] = info->memory; isend[4] = info->mallocs; 1423 1424 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1425 1426 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1427 isend[3] += info->memory; isend[4] += info->mallocs; 1428 if (flag == MAT_LOCAL) { 1429 info->nz_used = isend[0]; 1430 info->nz_allocated = isend[1]; 1431 info->nz_unneeded = isend[2]; 1432 info->memory = isend[3]; 1433 info->mallocs = isend[4]; 1434 } else if (flag == MAT_GLOBAL_MAX) { 1435 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1436 1437 info->nz_used = irecv[0]; 1438 info->nz_allocated = irecv[1]; 1439 info->nz_unneeded = irecv[2]; 1440 info->memory = irecv[3]; 1441 info->mallocs = irecv[4]; 1442 } else if (flag == MAT_GLOBAL_SUM) { 1443 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1444 1445 info->nz_used = irecv[0]; 1446 info->nz_allocated = irecv[1]; 1447 info->nz_unneeded = irecv[2]; 1448 info->memory = irecv[3]; 1449 info->mallocs = irecv[4]; 1450 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1451 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1452 info->fill_ratio_needed = 0; 1453 info->factor_mallocs = 0; 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1459 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1460 { 1461 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1462 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1463 PetscErrorCode ierr; 1464 1465 PetscFunctionBegin; 1466 switch (op) { 1467 case MAT_NEW_NONZERO_LOCATIONS: 1468 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1469 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1470 case MAT_KEEP_NONZERO_PATTERN: 1471 case MAT_NEW_NONZERO_LOCATION_ERR: 1472 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1473 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1474 break; 1475 case MAT_ROW_ORIENTED: 1476 a->roworiented = flg; 1477 1478 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1479 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1480 break; 1481 case MAT_NEW_DIAGONALS: 1482 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1483 break; 1484 case MAT_IGNORE_OFF_PROC_ENTRIES: 1485 a->donotstash = flg; 1486 break; 1487 case MAT_USE_HASH_TABLE: 1488 a->ht_flag = flg; 1489 break; 1490 case MAT_HERMITIAN: 1491 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1492 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1493 1494 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1495 break; 1496 case MAT_SPD: 1497 A->spd_set = PETSC_TRUE; 1498 A->spd = flg; 1499 if (flg) { 1500 A->symmetric = PETSC_TRUE; 1501 A->structurally_symmetric = PETSC_TRUE; 1502 A->symmetric_set = PETSC_TRUE; 1503 A->structurally_symmetric_set = PETSC_TRUE; 1504 } 1505 break; 1506 case MAT_SYMMETRIC: 1507 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1508 break; 1509 case MAT_STRUCTURALLY_SYMMETRIC: 1510 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1511 break; 1512 case MAT_SYMMETRY_ETERNAL: 1513 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1514 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1515 break; 1516 case MAT_IGNORE_LOWER_TRIANGULAR: 1517 aA->ignore_ltriangular = flg; 1518 break; 1519 case MAT_ERROR_LOWER_TRIANGULAR: 1520 aA->ignore_ltriangular = flg; 1521 break; 1522 case MAT_GETROW_UPPERTRIANGULAR: 1523 aA->getrow_utriangular = flg; 1524 break; 1525 default: 1526 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1527 } 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1533 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1534 { 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 if (MAT_INITIAL_MATRIX || *B != A) { 1539 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1540 } 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1546 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1547 { 1548 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1549 Mat a = baij->A, b=baij->B; 1550 PetscErrorCode ierr; 1551 PetscInt nv,m,n; 1552 PetscBool flg; 1553 1554 PetscFunctionBegin; 1555 if (ll != rr) { 1556 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1557 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1558 } 1559 if (!ll) PetscFunctionReturn(0); 1560 1561 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1562 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1563 1564 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1565 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1566 1567 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 1569 /* left diagonalscale the off-diagonal part */ 1570 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1571 1572 /* scale the diagonal part */ 1573 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1574 1575 /* right diagonalscale the off-diagonal part */ 1576 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1577 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1583 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1584 { 1585 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1586 PetscErrorCode ierr; 1587 1588 PetscFunctionBegin; 1589 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1590 PetscFunctionReturn(0); 1591 } 1592 1593 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1594 1595 #undef __FUNCT__ 1596 #define __FUNCT__ "MatEqual_MPISBAIJ" 1597 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1598 { 1599 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1600 Mat a,b,c,d; 1601 PetscBool flg; 1602 PetscErrorCode ierr; 1603 1604 PetscFunctionBegin; 1605 a = matA->A; b = matA->B; 1606 c = matB->A; d = matB->B; 1607 1608 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1609 if (flg) { 1610 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1611 } 1612 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1613 PetscFunctionReturn(0); 1614 } 1615 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatCopy_MPISBAIJ" 1618 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1619 { 1620 PetscErrorCode ierr; 1621 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1622 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1623 1624 PetscFunctionBegin; 1625 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1626 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1627 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1628 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1629 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1630 } else { 1631 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1632 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1633 } 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1639 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1640 { 1641 PetscErrorCode ierr; 1642 1643 PetscFunctionBegin; 1644 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1645 PetscFunctionReturn(0); 1646 } 1647 1648 #undef __FUNCT__ 1649 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1650 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1651 { 1652 PetscErrorCode ierr; 1653 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1654 PetscBLASInt bnz,one=1; 1655 Mat_SeqSBAIJ *xa,*ya; 1656 Mat_SeqBAIJ *xb,*yb; 1657 1658 PetscFunctionBegin; 1659 if (str == SAME_NONZERO_PATTERN) { 1660 PetscScalar alpha = a; 1661 xa = (Mat_SeqSBAIJ*)xx->A->data; 1662 ya = (Mat_SeqSBAIJ*)yy->A->data; 1663 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1664 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1665 xb = (Mat_SeqBAIJ*)xx->B->data; 1666 yb = (Mat_SeqBAIJ*)yy->B->data; 1667 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1668 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1669 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1670 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1671 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1672 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1673 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1674 } else { 1675 Mat B; 1676 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1677 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1678 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1679 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1680 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1681 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1682 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1683 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1684 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1685 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1686 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1687 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1688 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1689 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1690 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1691 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1692 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1693 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1694 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1695 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1696 } 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1702 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1703 { 1704 PetscErrorCode ierr; 1705 PetscInt i; 1706 PetscBool flg,sorted; 1707 1708 PetscFunctionBegin; 1709 for (i = 0; i < n; i++) { 1710 ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr); 1711 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i); 1712 ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr); 1713 if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i); 1714 } 1715 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1716 for (i=0; i<n; i++) { 1717 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1718 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1719 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1720 } 1721 } 1722 PetscFunctionReturn(0); 1723 } 1724 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatShift_MPISBAIJ" 1727 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1728 { 1729 PetscErrorCode ierr; 1730 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1731 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data,*bij = (Mat_SeqSBAIJ*)maij->B->data; 1732 1733 PetscFunctionBegin; 1734 if (!aij->nz && !bij->nz) { 1735 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1736 } 1737 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 /* -------------------------------------------------------------------*/ 1742 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1743 MatGetRow_MPISBAIJ, 1744 MatRestoreRow_MPISBAIJ, 1745 MatMult_MPISBAIJ, 1746 /* 4*/ MatMultAdd_MPISBAIJ, 1747 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1748 MatMultAdd_MPISBAIJ, 1749 0, 1750 0, 1751 0, 1752 /* 10*/ 0, 1753 0, 1754 0, 1755 MatSOR_MPISBAIJ, 1756 MatTranspose_MPISBAIJ, 1757 /* 15*/ MatGetInfo_MPISBAIJ, 1758 MatEqual_MPISBAIJ, 1759 MatGetDiagonal_MPISBAIJ, 1760 MatDiagonalScale_MPISBAIJ, 1761 MatNorm_MPISBAIJ, 1762 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1763 MatAssemblyEnd_MPISBAIJ, 1764 MatSetOption_MPISBAIJ, 1765 MatZeroEntries_MPISBAIJ, 1766 /* 24*/ 0, 1767 0, 1768 0, 1769 0, 1770 0, 1771 /* 29*/ MatSetUp_MPISBAIJ, 1772 0, 1773 0, 1774 0, 1775 0, 1776 /* 34*/ MatDuplicate_MPISBAIJ, 1777 0, 1778 0, 1779 0, 1780 0, 1781 /* 39*/ MatAXPY_MPISBAIJ, 1782 MatGetSubMatrices_MPISBAIJ, 1783 MatIncreaseOverlap_MPISBAIJ, 1784 MatGetValues_MPISBAIJ, 1785 MatCopy_MPISBAIJ, 1786 /* 44*/ 0, 1787 MatScale_MPISBAIJ, 1788 MatShift_MPISBAIJ, 1789 0, 1790 0, 1791 /* 49*/ 0, 1792 0, 1793 0, 1794 0, 1795 0, 1796 /* 54*/ 0, 1797 0, 1798 MatSetUnfactored_MPISBAIJ, 1799 0, 1800 MatSetValuesBlocked_MPISBAIJ, 1801 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1802 0, 1803 0, 1804 0, 1805 0, 1806 /* 64*/ 0, 1807 0, 1808 0, 1809 0, 1810 0, 1811 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1812 0, 1813 0, 1814 0, 1815 0, 1816 /* 74*/ 0, 1817 0, 1818 0, 1819 0, 1820 0, 1821 /* 79*/ 0, 1822 0, 1823 0, 1824 0, 1825 MatLoad_MPISBAIJ, 1826 /* 84*/ 0, 1827 0, 1828 0, 1829 0, 1830 0, 1831 /* 89*/ 0, 1832 0, 1833 0, 1834 0, 1835 0, 1836 /* 94*/ 0, 1837 0, 1838 0, 1839 0, 1840 0, 1841 /* 99*/ 0, 1842 0, 1843 0, 1844 0, 1845 0, 1846 /*104*/ 0, 1847 MatRealPart_MPISBAIJ, 1848 MatImaginaryPart_MPISBAIJ, 1849 MatGetRowUpperTriangular_MPISBAIJ, 1850 MatRestoreRowUpperTriangular_MPISBAIJ, 1851 /*109*/ 0, 1852 0, 1853 0, 1854 0, 1855 0, 1856 /*114*/ 0, 1857 0, 1858 0, 1859 0, 1860 0, 1861 /*119*/ 0, 1862 0, 1863 0, 1864 0, 1865 0, 1866 /*124*/ 0, 1867 0, 1868 0, 1869 0, 1870 0, 1871 /*129*/ 0, 1872 0, 1873 0, 1874 0, 1875 0, 1876 /*134*/ 0, 1877 0, 1878 0, 1879 0, 1880 0, 1881 /*139*/ 0, 1882 0, 1883 0, 1884 0, 1885 0, 1886 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1887 }; 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1891 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1892 { 1893 PetscFunctionBegin; 1894 *a = ((Mat_MPISBAIJ*)A->data)->A; 1895 PetscFunctionReturn(0); 1896 } 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1900 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1901 { 1902 Mat_MPISBAIJ *b; 1903 PetscErrorCode ierr; 1904 PetscInt i,mbs,Mbs; 1905 1906 PetscFunctionBegin; 1907 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1908 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1909 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1910 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1911 1912 b = (Mat_MPISBAIJ*)B->data; 1913 mbs = B->rmap->n/bs; 1914 Mbs = B->rmap->N/bs; 1915 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1916 1917 B->rmap->bs = bs; 1918 b->bs2 = bs*bs; 1919 b->mbs = mbs; 1920 b->Mbs = Mbs; 1921 b->nbs = B->cmap->n/bs; 1922 b->Nbs = B->cmap->N/bs; 1923 1924 for (i=0; i<=b->size; i++) { 1925 b->rangebs[i] = B->rmap->range[i]/bs; 1926 } 1927 b->rstartbs = B->rmap->rstart/bs; 1928 b->rendbs = B->rmap->rend/bs; 1929 1930 b->cstartbs = B->cmap->rstart/bs; 1931 b->cendbs = B->cmap->rend/bs; 1932 1933 if (!B->preallocated) { 1934 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1935 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1936 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1937 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1938 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1939 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1940 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1941 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1942 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1943 } 1944 1945 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1946 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1947 1948 B->preallocated = PETSC_TRUE; 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1954 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1955 { 1956 PetscInt m,rstart,cstart,cend; 1957 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1958 const PetscInt *JJ =0; 1959 PetscScalar *values=0; 1960 PetscErrorCode ierr; 1961 1962 PetscFunctionBegin; 1963 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1964 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1965 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1966 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1967 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1968 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1969 m = B->rmap->n/bs; 1970 rstart = B->rmap->rstart/bs; 1971 cstart = B->cmap->rstart/bs; 1972 cend = B->cmap->rend/bs; 1973 1974 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1975 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1976 for (i=0; i<m; i++) { 1977 nz = ii[i+1] - ii[i]; 1978 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1979 nz_max = PetscMax(nz_max,nz); 1980 JJ = jj + ii[i]; 1981 for (j=0; j<nz; j++) { 1982 if (*JJ >= cstart) break; 1983 JJ++; 1984 } 1985 d = 0; 1986 for (; j<nz; j++) { 1987 if (*JJ++ >= cend) break; 1988 d++; 1989 } 1990 d_nnz[i] = d; 1991 o_nnz[i] = nz - d; 1992 } 1993 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1994 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1995 1996 values = (PetscScalar*)V; 1997 if (!values) { 1998 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 1999 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2000 } 2001 for (i=0; i<m; i++) { 2002 PetscInt row = i + rstart; 2003 PetscInt ncols = ii[i+1] - ii[i]; 2004 const PetscInt *icols = jj + ii[i]; 2005 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2006 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2007 } 2008 2009 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2010 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2011 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2012 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2013 PetscFunctionReturn(0); 2014 } 2015 2016 /*MC 2017 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2018 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2019 the matrix is stored. 2020 2021 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2022 can call MatSetOption(Mat, MAT_HERMITIAN); 2023 2024 Options Database Keys: 2025 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2026 2027 Level: beginner 2028 2029 .seealso: MatCreateMPISBAIJ 2030 M*/ 2031 2032 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2033 2034 #undef __FUNCT__ 2035 #define __FUNCT__ "MatCreate_MPISBAIJ" 2036 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2037 { 2038 Mat_MPISBAIJ *b; 2039 PetscErrorCode ierr; 2040 PetscBool flg = PETSC_FALSE; 2041 2042 PetscFunctionBegin; 2043 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2044 B->data = (void*)b; 2045 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2046 2047 B->ops->destroy = MatDestroy_MPISBAIJ; 2048 B->ops->view = MatView_MPISBAIJ; 2049 B->assembled = PETSC_FALSE; 2050 B->insertmode = NOT_SET_VALUES; 2051 2052 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2053 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2054 2055 /* build local table of row and column ownerships */ 2056 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2057 2058 /* build cache for off array entries formed */ 2059 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2060 2061 b->donotstash = PETSC_FALSE; 2062 b->colmap = NULL; 2063 b->garray = NULL; 2064 b->roworiented = PETSC_TRUE; 2065 2066 /* stuff used in block assembly */ 2067 b->barray = 0; 2068 2069 /* stuff used for matrix vector multiply */ 2070 b->lvec = 0; 2071 b->Mvctx = 0; 2072 b->slvec0 = 0; 2073 b->slvec0b = 0; 2074 b->slvec1 = 0; 2075 b->slvec1a = 0; 2076 b->slvec1b = 0; 2077 b->sMvctx = 0; 2078 2079 /* stuff for MatGetRow() */ 2080 b->rowindices = 0; 2081 b->rowvalues = 0; 2082 b->getrowactive = PETSC_FALSE; 2083 2084 /* hash table stuff */ 2085 b->ht = 0; 2086 b->hd = 0; 2087 b->ht_size = 0; 2088 b->ht_flag = PETSC_FALSE; 2089 b->ht_fact = 0; 2090 b->ht_total_ct = 0; 2091 b->ht_insert_ct = 0; 2092 2093 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2094 b->ijonly = PETSC_FALSE; 2095 2096 b->in_loc = 0; 2097 b->v_loc = 0; 2098 b->n_loc = 0; 2099 2100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 2106 #if defined(PETSC_HAVE_ELEMENTAL) 2107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2108 #endif 2109 2110 B->symmetric = PETSC_TRUE; 2111 B->structurally_symmetric = PETSC_TRUE; 2112 B->symmetric_set = PETSC_TRUE; 2113 B->structurally_symmetric_set = PETSC_TRUE; 2114 2115 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2116 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2117 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2118 if (flg) { 2119 PetscReal fact = 1.39; 2120 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2121 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2122 if (fact <= 1.0) fact = 1.39; 2123 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2124 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2125 } 2126 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2127 PetscFunctionReturn(0); 2128 } 2129 2130 /*MC 2131 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2132 2133 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2134 and MATMPISBAIJ otherwise. 2135 2136 Options Database Keys: 2137 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2138 2139 Level: beginner 2140 2141 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2142 M*/ 2143 2144 #undef __FUNCT__ 2145 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2146 /*@C 2147 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2148 the user should preallocate the matrix storage by setting the parameters 2149 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2150 performance can be increased by more than a factor of 50. 2151 2152 Collective on Mat 2153 2154 Input Parameters: 2155 + B - the matrix 2156 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2157 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2158 . d_nz - number of block nonzeros per block row in diagonal portion of local 2159 submatrix (same for all local rows) 2160 . d_nnz - array containing the number of block nonzeros in the various block rows 2161 in the upper triangular and diagonal part of the in diagonal portion of the local 2162 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2163 for the diagonal entry and set a value even if it is zero. 2164 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2165 submatrix (same for all local rows). 2166 - o_nnz - array containing the number of nonzeros in the various block rows of the 2167 off-diagonal portion of the local submatrix that is right of the diagonal 2168 (possibly different for each block row) or NULL. 2169 2170 2171 Options Database Keys: 2172 . -mat_no_unroll - uses code that does not unroll the loops in the 2173 block calculations (much slower) 2174 . -mat_block_size - size of the blocks to use 2175 2176 Notes: 2177 2178 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2179 than it must be used on all processors that share the object for that argument. 2180 2181 If the *_nnz parameter is given then the *_nz parameter is ignored 2182 2183 Storage Information: 2184 For a square global matrix we define each processor's diagonal portion 2185 to be its local rows and the corresponding columns (a square submatrix); 2186 each processor's off-diagonal portion encompasses the remainder of the 2187 local matrix (a rectangular submatrix). 2188 2189 The user can specify preallocated storage for the diagonal part of 2190 the local submatrix with either d_nz or d_nnz (not both). Set 2191 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2192 memory allocation. Likewise, specify preallocated storage for the 2193 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2194 2195 You can call MatGetInfo() to get information on how effective the preallocation was; 2196 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2197 You can also run with the option -info and look for messages with the string 2198 malloc in them to see if additional memory allocation was needed. 2199 2200 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2201 the figure below we depict these three local rows and all columns (0-11). 2202 2203 .vb 2204 0 1 2 3 4 5 6 7 8 9 10 11 2205 -------------------------- 2206 row 3 |. . . d d d o o o o o o 2207 row 4 |. . . d d d o o o o o o 2208 row 5 |. . . d d d o o o o o o 2209 -------------------------- 2210 .ve 2211 2212 Thus, any entries in the d locations are stored in the d (diagonal) 2213 submatrix, and any entries in the o locations are stored in the 2214 o (off-diagonal) submatrix. Note that the d matrix is stored in 2215 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2216 2217 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2218 plus the diagonal part of the d matrix, 2219 and o_nz should indicate the number of block nonzeros per row in the o matrix 2220 2221 In general, for PDE problems in which most nonzeros are near the diagonal, 2222 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2223 or you will get TERRIBLE performance; see the users' manual chapter on 2224 matrices. 2225 2226 Level: intermediate 2227 2228 .keywords: matrix, block, aij, compressed row, sparse, parallel 2229 2230 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2231 @*/ 2232 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2233 { 2234 PetscErrorCode ierr; 2235 2236 PetscFunctionBegin; 2237 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2238 PetscValidType(B,1); 2239 PetscValidLogicalCollectiveInt(B,bs,2); 2240 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2241 PetscFunctionReturn(0); 2242 } 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "MatCreateSBAIJ" 2246 /*@C 2247 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2248 (block compressed row). For good matrix assembly performance 2249 the user should preallocate the matrix storage by setting the parameters 2250 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2251 performance can be increased by more than a factor of 50. 2252 2253 Collective on MPI_Comm 2254 2255 Input Parameters: 2256 + comm - MPI communicator 2257 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2258 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2259 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2260 This value should be the same as the local size used in creating the 2261 y vector for the matrix-vector product y = Ax. 2262 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2263 This value should be the same as the local size used in creating the 2264 x vector for the matrix-vector product y = Ax. 2265 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2266 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2267 . d_nz - number of block nonzeros per block row in diagonal portion of local 2268 submatrix (same for all local rows) 2269 . d_nnz - array containing the number of block nonzeros in the various block rows 2270 in the upper triangular portion of the in diagonal portion of the local 2271 (possibly different for each block block row) or NULL. 2272 If you plan to factor the matrix you must leave room for the diagonal entry and 2273 set its value even if it is zero. 2274 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2275 submatrix (same for all local rows). 2276 - o_nnz - array containing the number of nonzeros in the various block rows of the 2277 off-diagonal portion of the local submatrix (possibly different for 2278 each block row) or NULL. 2279 2280 Output Parameter: 2281 . A - the matrix 2282 2283 Options Database Keys: 2284 . -mat_no_unroll - uses code that does not unroll the loops in the 2285 block calculations (much slower) 2286 . -mat_block_size - size of the blocks to use 2287 . -mat_mpi - use the parallel matrix data structures even on one processor 2288 (defaults to using SeqBAIJ format on one processor) 2289 2290 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2291 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2292 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2293 2294 Notes: 2295 The number of rows and columns must be divisible by blocksize. 2296 This matrix type does not support complex Hermitian operation. 2297 2298 The user MUST specify either the local or global matrix dimensions 2299 (possibly both). 2300 2301 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2302 than it must be used on all processors that share the object for that argument. 2303 2304 If the *_nnz parameter is given then the *_nz parameter is ignored 2305 2306 Storage Information: 2307 For a square global matrix we define each processor's diagonal portion 2308 to be its local rows and the corresponding columns (a square submatrix); 2309 each processor's off-diagonal portion encompasses the remainder of the 2310 local matrix (a rectangular submatrix). 2311 2312 The user can specify preallocated storage for the diagonal part of 2313 the local submatrix with either d_nz or d_nnz (not both). Set 2314 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2315 memory allocation. Likewise, specify preallocated storage for the 2316 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2317 2318 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2319 the figure below we depict these three local rows and all columns (0-11). 2320 2321 .vb 2322 0 1 2 3 4 5 6 7 8 9 10 11 2323 -------------------------- 2324 row 3 |. . . d d d o o o o o o 2325 row 4 |. . . d d d o o o o o o 2326 row 5 |. . . d d d o o o o o o 2327 -------------------------- 2328 .ve 2329 2330 Thus, any entries in the d locations are stored in the d (diagonal) 2331 submatrix, and any entries in the o locations are stored in the 2332 o (off-diagonal) submatrix. Note that the d matrix is stored in 2333 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2334 2335 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2336 plus the diagonal part of the d matrix, 2337 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2338 In general, for PDE problems in which most nonzeros are near the diagonal, 2339 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2340 or you will get TERRIBLE performance; see the users' manual chapter on 2341 matrices. 2342 2343 Level: intermediate 2344 2345 .keywords: matrix, block, aij, compressed row, sparse, parallel 2346 2347 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2348 @*/ 2349 2350 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2351 { 2352 PetscErrorCode ierr; 2353 PetscMPIInt size; 2354 2355 PetscFunctionBegin; 2356 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2357 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2358 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2359 if (size > 1) { 2360 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2361 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2362 } else { 2363 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2364 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2365 } 2366 PetscFunctionReturn(0); 2367 } 2368 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2372 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2373 { 2374 Mat mat; 2375 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2376 PetscErrorCode ierr; 2377 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2378 PetscScalar *array; 2379 2380 PetscFunctionBegin; 2381 *newmat = 0; 2382 2383 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2384 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2385 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2386 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2387 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2388 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2389 2390 mat->factortype = matin->factortype; 2391 mat->preallocated = PETSC_TRUE; 2392 mat->assembled = PETSC_TRUE; 2393 mat->insertmode = NOT_SET_VALUES; 2394 2395 a = (Mat_MPISBAIJ*)mat->data; 2396 a->bs2 = oldmat->bs2; 2397 a->mbs = oldmat->mbs; 2398 a->nbs = oldmat->nbs; 2399 a->Mbs = oldmat->Mbs; 2400 a->Nbs = oldmat->Nbs; 2401 2402 2403 a->size = oldmat->size; 2404 a->rank = oldmat->rank; 2405 a->donotstash = oldmat->donotstash; 2406 a->roworiented = oldmat->roworiented; 2407 a->rowindices = 0; 2408 a->rowvalues = 0; 2409 a->getrowactive = PETSC_FALSE; 2410 a->barray = 0; 2411 a->rstartbs = oldmat->rstartbs; 2412 a->rendbs = oldmat->rendbs; 2413 a->cstartbs = oldmat->cstartbs; 2414 a->cendbs = oldmat->cendbs; 2415 2416 /* hash table stuff */ 2417 a->ht = 0; 2418 a->hd = 0; 2419 a->ht_size = 0; 2420 a->ht_flag = oldmat->ht_flag; 2421 a->ht_fact = oldmat->ht_fact; 2422 a->ht_total_ct = 0; 2423 a->ht_insert_ct = 0; 2424 2425 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2426 if (oldmat->colmap) { 2427 #if defined(PETSC_USE_CTABLE) 2428 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2429 #else 2430 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2431 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2432 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2433 #endif 2434 } else a->colmap = 0; 2435 2436 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2437 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2438 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2439 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2440 } else a->garray = 0; 2441 2442 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2443 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2444 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2445 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2446 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2447 2448 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2449 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2450 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2451 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2452 2453 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2454 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2455 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2456 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2457 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2458 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2459 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2460 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2461 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2462 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2463 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2464 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2465 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2466 2467 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2468 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2469 a->sMvctx = oldmat->sMvctx; 2470 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2471 2472 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2473 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2474 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2475 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2476 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2477 *newmat = mat; 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatLoad_MPISBAIJ" 2483 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2484 { 2485 PetscErrorCode ierr; 2486 PetscInt i,nz,j,rstart,rend; 2487 PetscScalar *vals,*buf; 2488 MPI_Comm comm; 2489 MPI_Status status; 2490 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2491 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2492 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2493 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2494 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2495 PetscInt dcount,kmax,k,nzcount,tmp; 2496 int fd; 2497 2498 PetscFunctionBegin; 2499 /* force binary viewer to load .info file if it has not yet done so */ 2500 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2501 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2502 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2503 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2504 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2505 if (bs < 0) bs = 1; 2506 2507 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2508 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2509 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2510 if (!rank) { 2511 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2512 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2513 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2514 } 2515 2516 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2517 M = header[1]; 2518 N = header[2]; 2519 2520 /* If global sizes are set, check if they are consistent with that given in the file */ 2521 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2522 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2523 2524 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2525 2526 /* 2527 This code adds extra rows to make sure the number of rows is 2528 divisible by the blocksize 2529 */ 2530 Mbs = M/bs; 2531 extra_rows = bs - M + bs*(Mbs); 2532 if (extra_rows == bs) extra_rows = 0; 2533 else Mbs++; 2534 if (extra_rows &&!rank) { 2535 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2536 } 2537 2538 /* determine ownership of all rows */ 2539 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2540 mbs = Mbs/size + ((Mbs % size) > rank); 2541 m = mbs*bs; 2542 } else { /* User Set */ 2543 m = newmat->rmap->n; 2544 mbs = m/bs; 2545 } 2546 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2547 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2548 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2549 rowners[0] = 0; 2550 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2551 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2552 rstart = rowners[rank]; 2553 rend = rowners[rank+1]; 2554 2555 /* distribute row lengths to all processors */ 2556 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2557 if (!rank) { 2558 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2559 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2560 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2561 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2562 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2563 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2564 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2565 } else { 2566 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2567 } 2568 2569 if (!rank) { /* procs[0] */ 2570 /* calculate the number of nonzeros on each processor */ 2571 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2572 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2573 for (i=0; i<size; i++) { 2574 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2575 procsnz[i] += rowlengths[j]; 2576 } 2577 } 2578 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2579 2580 /* determine max buffer needed and allocate it */ 2581 maxnz = 0; 2582 for (i=0; i<size; i++) { 2583 maxnz = PetscMax(maxnz,procsnz[i]); 2584 } 2585 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2586 2587 /* read in my part of the matrix column indices */ 2588 nz = procsnz[0]; 2589 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2590 mycols = ibuf; 2591 if (size == 1) nz -= extra_rows; 2592 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2593 if (size == 1) { 2594 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2595 } 2596 2597 /* read in every ones (except the last) and ship off */ 2598 for (i=1; i<size-1; i++) { 2599 nz = procsnz[i]; 2600 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2601 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2602 } 2603 /* read in the stuff for the last proc */ 2604 if (size != 1) { 2605 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2606 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2607 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2608 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2609 } 2610 ierr = PetscFree(cols);CHKERRQ(ierr); 2611 } else { /* procs[i], i>0 */ 2612 /* determine buffer space needed for message */ 2613 nz = 0; 2614 for (i=0; i<m; i++) nz += locrowlens[i]; 2615 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2616 mycols = ibuf; 2617 /* receive message of column indices*/ 2618 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2619 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2620 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2621 } 2622 2623 /* loop over local rows, determining number of off diagonal entries */ 2624 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2625 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2626 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2627 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2628 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2629 rowcount = 0; 2630 nzcount = 0; 2631 for (i=0; i<mbs; i++) { 2632 dcount = 0; 2633 odcount = 0; 2634 for (j=0; j<bs; j++) { 2635 kmax = locrowlens[rowcount]; 2636 for (k=0; k<kmax; k++) { 2637 tmp = mycols[nzcount++]/bs; /* block col. index */ 2638 if (!mask[tmp]) { 2639 mask[tmp] = 1; 2640 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2641 else masked1[dcount++] = tmp; /* entry in diag portion */ 2642 } 2643 } 2644 rowcount++; 2645 } 2646 2647 dlens[i] = dcount; /* d_nzz[i] */ 2648 odlens[i] = odcount; /* o_nzz[i] */ 2649 2650 /* zero out the mask elements we set */ 2651 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2652 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2653 } 2654 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2655 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2656 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2657 2658 if (!rank) { 2659 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2660 /* read in my part of the matrix numerical values */ 2661 nz = procsnz[0]; 2662 vals = buf; 2663 mycols = ibuf; 2664 if (size == 1) nz -= extra_rows; 2665 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2666 if (size == 1) { 2667 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2668 } 2669 2670 /* insert into matrix */ 2671 jj = rstart*bs; 2672 for (i=0; i<m; i++) { 2673 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2674 mycols += locrowlens[i]; 2675 vals += locrowlens[i]; 2676 jj++; 2677 } 2678 2679 /* read in other processors (except the last one) and ship out */ 2680 for (i=1; i<size-1; i++) { 2681 nz = procsnz[i]; 2682 vals = buf; 2683 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2684 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2685 } 2686 /* the last proc */ 2687 if (size != 1) { 2688 nz = procsnz[i] - extra_rows; 2689 vals = buf; 2690 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2691 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2692 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2693 } 2694 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2695 2696 } else { 2697 /* receive numeric values */ 2698 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2699 2700 /* receive message of values*/ 2701 vals = buf; 2702 mycols = ibuf; 2703 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2704 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2705 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2706 2707 /* insert into matrix */ 2708 jj = rstart*bs; 2709 for (i=0; i<m; i++) { 2710 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2711 mycols += locrowlens[i]; 2712 vals += locrowlens[i]; 2713 jj++; 2714 } 2715 } 2716 2717 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2718 ierr = PetscFree(buf);CHKERRQ(ierr); 2719 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2720 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2721 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2722 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2723 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2724 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2725 PetscFunctionReturn(0); 2726 } 2727 2728 #undef __FUNCT__ 2729 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2730 /*XXXXX@ 2731 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2732 2733 Input Parameters: 2734 . mat - the matrix 2735 . fact - factor 2736 2737 Not Collective on Mat, each process can have a different hash factor 2738 2739 Level: advanced 2740 2741 Notes: 2742 This can also be set by the command line option: -mat_use_hash_table fact 2743 2744 .keywords: matrix, hashtable, factor, HT 2745 2746 .seealso: MatSetOption() 2747 @XXXXX*/ 2748 2749 2750 #undef __FUNCT__ 2751 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2752 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2753 { 2754 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2755 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2756 PetscReal atmp; 2757 PetscReal *work,*svalues,*rvalues; 2758 PetscErrorCode ierr; 2759 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2760 PetscMPIInt rank,size; 2761 PetscInt *rowners_bs,dest,count,source; 2762 PetscScalar *va; 2763 MatScalar *ba; 2764 MPI_Status stat; 2765 2766 PetscFunctionBegin; 2767 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2768 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2769 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2770 2771 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2772 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2773 2774 bs = A->rmap->bs; 2775 mbs = a->mbs; 2776 Mbs = a->Mbs; 2777 ba = b->a; 2778 bi = b->i; 2779 bj = b->j; 2780 2781 /* find ownerships */ 2782 rowners_bs = A->rmap->range; 2783 2784 /* each proc creates an array to be distributed */ 2785 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2786 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2787 2788 /* row_max for B */ 2789 if (rank != size-1) { 2790 for (i=0; i<mbs; i++) { 2791 ncols = bi[1] - bi[0]; bi++; 2792 brow = bs*i; 2793 for (j=0; j<ncols; j++) { 2794 bcol = bs*(*bj); 2795 for (kcol=0; kcol<bs; kcol++) { 2796 col = bcol + kcol; /* local col index */ 2797 col += rowners_bs[rank+1]; /* global col index */ 2798 for (krow=0; krow<bs; krow++) { 2799 atmp = PetscAbsScalar(*ba); ba++; 2800 row = brow + krow; /* local row index */ 2801 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2802 if (work[col] < atmp) work[col] = atmp; 2803 } 2804 } 2805 bj++; 2806 } 2807 } 2808 2809 /* send values to its owners */ 2810 for (dest=rank+1; dest<size; dest++) { 2811 svalues = work + rowners_bs[dest]; 2812 count = rowners_bs[dest+1]-rowners_bs[dest]; 2813 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2814 } 2815 } 2816 2817 /* receive values */ 2818 if (rank) { 2819 rvalues = work; 2820 count = rowners_bs[rank+1]-rowners_bs[rank]; 2821 for (source=0; source<rank; source++) { 2822 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2823 /* process values */ 2824 for (i=0; i<count; i++) { 2825 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2826 } 2827 } 2828 } 2829 2830 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2831 ierr = PetscFree(work);CHKERRQ(ierr); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 #undef __FUNCT__ 2836 #define __FUNCT__ "MatSOR_MPISBAIJ" 2837 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2838 { 2839 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2840 PetscErrorCode ierr; 2841 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2842 PetscScalar *x,*ptr,*from; 2843 Vec bb1; 2844 const PetscScalar *b; 2845 2846 PetscFunctionBegin; 2847 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2848 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2849 2850 if (flag == SOR_APPLY_UPPER) { 2851 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2852 PetscFunctionReturn(0); 2853 } 2854 2855 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2856 if (flag & SOR_ZERO_INITIAL_GUESS) { 2857 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2858 its--; 2859 } 2860 2861 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2862 while (its--) { 2863 2864 /* lower triangular part: slvec0b = - B^T*xx */ 2865 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2866 2867 /* copy xx into slvec0a */ 2868 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2869 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2870 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2871 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2872 2873 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2874 2875 /* copy bb into slvec1a */ 2876 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2877 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2878 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2879 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2880 2881 /* set slvec1b = 0 */ 2882 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2883 2884 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2885 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2886 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2887 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2888 2889 /* upper triangular part: bb1 = bb1 - B*x */ 2890 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2891 2892 /* local diagonal sweep */ 2893 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2894 } 2895 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2896 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2897 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2898 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2899 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2900 } else if (flag & SOR_EISENSTAT) { 2901 Vec xx1; 2902 PetscBool hasop; 2903 const PetscScalar *diag; 2904 PetscScalar *sl,scale = (omega - 2.0)/omega; 2905 PetscInt i,n; 2906 2907 if (!mat->xx1) { 2908 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2909 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2910 } 2911 xx1 = mat->xx1; 2912 bb1 = mat->bb1; 2913 2914 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2915 2916 if (!mat->diag) { 2917 /* this is wrong for same matrix with new nonzero values */ 2918 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2919 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2920 } 2921 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2922 2923 if (hasop) { 2924 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2925 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2926 } else { 2927 /* 2928 These two lines are replaced by code that may be a bit faster for a good compiler 2929 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2930 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2931 */ 2932 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2933 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2934 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2935 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2936 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2937 if (omega == 1.0) { 2938 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2939 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2940 } else { 2941 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2942 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2943 } 2944 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2945 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2946 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2947 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2948 } 2949 2950 /* multiply off-diagonal portion of matrix */ 2951 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2952 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2953 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2954 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2955 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2956 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2957 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2958 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2959 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2960 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2961 2962 /* local sweep */ 2963 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2964 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2965 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2966 PetscFunctionReturn(0); 2967 } 2968 2969 #undef __FUNCT__ 2970 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2971 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2972 { 2973 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2974 PetscErrorCode ierr; 2975 Vec lvec1,bb1; 2976 2977 PetscFunctionBegin; 2978 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2979 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2980 2981 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2982 if (flag & SOR_ZERO_INITIAL_GUESS) { 2983 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2984 its--; 2985 } 2986 2987 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2988 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2989 while (its--) { 2990 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2991 2992 /* lower diagonal part: bb1 = bb - B^T*xx */ 2993 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2994 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2995 2996 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2997 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2998 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2999 3000 /* upper diagonal part: bb1 = bb1 - B*x */ 3001 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3002 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3003 3004 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3005 3006 /* diagonal sweep */ 3007 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3008 } 3009 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3010 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3011 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3012 PetscFunctionReturn(0); 3013 } 3014 3015 #undef __FUNCT__ 3016 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3017 /*@ 3018 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3019 CSR format the local rows. 3020 3021 Collective on MPI_Comm 3022 3023 Input Parameters: 3024 + comm - MPI communicator 3025 . bs - the block size, only a block size of 1 is supported 3026 . m - number of local rows (Cannot be PETSC_DECIDE) 3027 . n - This value should be the same as the local size used in creating the 3028 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3029 calculated if N is given) For square matrices n is almost always m. 3030 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3031 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3032 . i - row indices 3033 . j - column indices 3034 - a - matrix values 3035 3036 Output Parameter: 3037 . mat - the matrix 3038 3039 Level: intermediate 3040 3041 Notes: 3042 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3043 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3044 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3045 3046 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3047 3048 .keywords: matrix, aij, compressed row, sparse, parallel 3049 3050 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3051 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3052 @*/ 3053 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3054 { 3055 PetscErrorCode ierr; 3056 3057 3058 PetscFunctionBegin; 3059 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3060 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3061 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3062 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3063 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3064 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3065 PetscFunctionReturn(0); 3066 } 3067 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3071 /*@C 3072 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3073 (the default parallel PETSc format). 3074 3075 Collective on MPI_Comm 3076 3077 Input Parameters: 3078 + B - the matrix 3079 . bs - the block size 3080 . i - the indices into j for the start of each local row (starts with zero) 3081 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3082 - v - optional values in the matrix 3083 3084 Level: developer 3085 3086 .keywords: matrix, aij, compressed row, sparse, parallel 3087 3088 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3089 @*/ 3090 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3091 { 3092 PetscErrorCode ierr; 3093 3094 PetscFunctionBegin; 3095 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3096 PetscFunctionReturn(0); 3097 } 3098 3099 #undef __FUNCT__ 3100 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3101 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3102 { 3103 PetscErrorCode ierr; 3104 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3105 PetscInt *indx; 3106 PetscScalar *values; 3107 3108 PetscFunctionBegin; 3109 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3110 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3111 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3112 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3113 PetscInt *bindx,rmax=a->rmax,j; 3114 3115 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3116 mbs = m/bs; Nbs = N/cbs; 3117 if (n == PETSC_DECIDE) { 3118 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3119 } 3120 /* Check sum(n) = Nbs */ 3121 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3122 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3123 3124 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3125 rstart -= mbs; 3126 3127 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3128 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3129 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3130 for (i=0; i<mbs; i++) { 3131 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3132 nnz = nnz/bs; 3133 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3134 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3135 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3136 } 3137 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3138 ierr = PetscFree(bindx);CHKERRQ(ierr); 3139 3140 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3141 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3142 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3143 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3144 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3145 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3146 } 3147 3148 /* numeric phase */ 3149 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3150 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3151 3152 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3153 for (i=0; i<m; i++) { 3154 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3155 Ii = i + rstart; 3156 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3157 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3158 } 3159 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3160 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3161 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3162 PetscFunctionReturn(0); 3163 } 3164