1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 637 } 638 /* Bmat */ 639 v = bmat->a; jj = bmat->j; 640 for (brow=0; brow<mbs; brow++) { 641 grow = bs*(rstart + brow); 642 nz = bmat->i[brow+1] - bmat->i[brow]; 643 for (bcol=0; bcol<nz; bcol++) { 644 gcol = bs*garray[*jj]; jj++; 645 for (col=0; col<bs; col++) { 646 for (row=0; row<bs; row++) { 647 vabs = PetscAbsScalar(*v); v++; 648 rsum[gcol+col] += vabs; 649 rsum[grow+row] += vabs; 650 } 651 } 652 } 653 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 654 } 655 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 656 *norm = 0.0; 657 for (col=0; col<mat->cmap->N; col++) { 658 if (rsum2[col] > *norm) *norm = rsum2[col]; 659 } 660 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 668 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt nstash,reallocs; 673 674 PetscFunctionBegin; 675 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 676 677 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 678 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 679 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 680 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 681 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 682 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 688 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 689 { 690 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 691 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 692 PetscErrorCode ierr; 693 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 694 PetscInt *row,*col; 695 PetscBool other_disassembled; 696 PetscMPIInt n; 697 PetscBool r1,r2,r3; 698 MatScalar *val; 699 700 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 701 PetscFunctionBegin; 702 if (!baij->donotstash && !mat->nooffprocentries) { 703 while (1) { 704 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 705 if (!flg) break; 706 707 for (i=0; i<n;) { 708 /* Now identify the consecutive vals belonging to the same row */ 709 for (j=i,rstart=row[j]; j<n; j++) { 710 if (row[j] != rstart) break; 711 } 712 if (j < n) ncols = j-i; 713 else ncols = n-i; 714 /* Now assemble all these values with a single function call */ 715 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 716 i = j; 717 } 718 } 719 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 720 /* Now process the block-stash. Since the values are stashed column-oriented, 721 set the roworiented flag to column oriented, and after MatSetValues() 722 restore the original flags */ 723 r1 = baij->roworiented; 724 r2 = a->roworiented; 725 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 726 727 baij->roworiented = PETSC_FALSE; 728 a->roworiented = PETSC_FALSE; 729 730 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 731 while (1) { 732 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 733 if (!flg) break; 734 735 for (i=0; i<n;) { 736 /* Now identify the consecutive vals belonging to the same row */ 737 for (j=i,rstart=row[j]; j<n; j++) { 738 if (row[j] != rstart) break; 739 } 740 if (j < n) ncols = j-i; 741 else ncols = n-i; 742 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 743 i = j; 744 } 745 } 746 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 747 748 baij->roworiented = r1; 749 a->roworiented = r2; 750 751 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 752 } 753 754 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 755 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 756 757 /* determine if any processor has disassembled, if so we must 758 also disassemble ourselfs, in order that we may reassemble. */ 759 /* 760 if nonzero structure of submatrix B cannot change then we know that 761 no processor disassembled thus we can skip this stuff 762 */ 763 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 764 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 765 if (mat->was_assembled && !other_disassembled) { 766 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 767 } 768 } 769 770 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 771 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 772 } 773 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 774 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 775 776 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 777 778 baij->rowvalues = 0; 779 780 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 781 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 782 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 783 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 784 } 785 PetscFunctionReturn(0); 786 } 787 788 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer); 789 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 790 #include <petscdraw.h> 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 793 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 794 { 795 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 796 PetscErrorCode ierr; 797 PetscInt bs = mat->rmap->bs; 798 PetscMPIInt rank = baij->rank; 799 PetscBool iascii,isdraw; 800 PetscViewer sviewer; 801 PetscViewerFormat format; 802 803 PetscFunctionBegin; 804 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 805 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 806 if (iascii) { 807 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 808 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 809 MatInfo info; 810 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 811 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 812 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 814 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 816 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 818 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 819 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 820 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 821 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } else if (format == PETSC_VIEWER_ASCII_INFO) { 824 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 827 PetscFunctionReturn(0); 828 } 829 } 830 831 if (isdraw) { 832 PetscDraw draw; 833 PetscBool isnull; 834 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 835 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 836 if (isnull) PetscFunctionReturn(0); 837 } 838 839 { 840 /* assemble the entire matrix onto first processor. */ 841 Mat A; 842 Mat_SeqSBAIJ *Aloc; 843 Mat_SeqBAIJ *Bloc; 844 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 845 MatScalar *a; 846 const char *matname; 847 848 /* Should this be the same type as mat? */ 849 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 850 if (!rank) { 851 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 852 } else { 853 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 854 } 855 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 856 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 857 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 858 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 859 860 /* copy over the A part */ 861 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 862 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 863 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 864 865 for (i=0; i<mbs; i++) { 866 rvals[0] = bs*(baij->rstartbs + i); 867 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 868 for (j=ai[i]; j<ai[i+1]; j++) { 869 col = (baij->cstartbs+aj[j])*bs; 870 for (k=0; k<bs; k++) { 871 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 872 col++; 873 a += bs; 874 } 875 } 876 } 877 /* copy over the B part */ 878 Bloc = (Mat_SeqBAIJ*)baij->B->data; 879 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 880 for (i=0; i<mbs; i++) { 881 882 rvals[0] = bs*(baij->rstartbs + i); 883 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 884 for (j=ai[i]; j<ai[i+1]; j++) { 885 col = baij->garray[aj[j]]*bs; 886 for (k=0; k<bs; k++) { 887 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 888 col++; 889 a += bs; 890 } 891 } 892 } 893 ierr = PetscFree(rvals);CHKERRQ(ierr); 894 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 895 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 /* 897 Everyone has to call to draw the matrix since the graphics waits are 898 synchronized across all processors that share the PetscDraw object 899 */ 900 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 901 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 902 if (!rank) { 903 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 904 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 905 } 906 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 907 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 908 ierr = MatDestroy(&A);CHKERRQ(ierr); 909 } 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatView_MPISBAIJ_Binary" 915 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 916 { 917 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 918 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 919 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 920 PetscErrorCode ierr; 921 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 922 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 923 int fd; 924 PetscScalar *column_values; 925 FILE *file; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 PetscInt message_count,flowcontrolcount; 928 929 PetscFunctionBegin; 930 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 931 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 932 nz = bs2*(A->nz + B->nz); 933 rlen = mat->rmap->n; 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 if (!rank) { 936 header[0] = MAT_FILE_CLASSID; 937 header[1] = mat->rmap->N; 938 header[2] = mat->cmap->N; 939 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 941 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 942 /* get largest number of rows any processor has */ 943 range = mat->rmap->range; 944 for (i=1; i<size; i++) { 945 rlen = PetscMax(rlen,range[i+1] - range[i]); 946 } 947 } else { 948 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 949 } 950 951 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 952 /* compute lengths of each row */ 953 for (i=0; i<a->mbs; i++) { 954 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 955 } 956 /* store the row lengths to the file */ 957 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 958 if (!rank) { 959 MPI_Status status; 960 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 961 rlen = (range[1] - range[0])/bs; 962 for (i=0; i<rlen; i++) { 963 for (j=0; j<bs; j++) { 964 row_lens[i*bs+j] = bs*crow_lens[i]; 965 } 966 } 967 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 for (i=1; i<size; i++) { 969 rlen = (range[i+1] - range[i])/bs; 970 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 971 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 972 for (k=0; k<rlen; k++) { 973 for (j=0; j<bs; j++) { 974 row_lens[k*bs+j] = bs*crow_lens[k]; 975 } 976 } 977 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 978 } 979 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 980 ierr = PetscFree(row_lens);CHKERRQ(ierr); 981 } else { 982 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 983 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 984 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 985 } 986 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 987 988 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 989 information needed to make it for each row from a block row. This does require more communication but still not more than 990 the communication needed for the nonzero values */ 991 nzmax = nz; /* space a largest processor needs */ 992 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 993 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 994 cnt = 0; 995 for (i=0; i<a->mbs; i++) { 996 pcnt = cnt; 997 for (j=B->i[i]; j<B->i[i+1]; j++) { 998 if ((col = garray[B->j[j]]) > cstart) break; 999 for (l=0; l<bs; l++) { 1000 column_indices[cnt++] = bs*col+l; 1001 } 1002 } 1003 for (k=A->i[i]; k<A->i[i+1]; k++) { 1004 for (l=0; l<bs; l++) { 1005 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1006 } 1007 } 1008 for (; j<B->i[i+1]; j++) { 1009 for (l=0; l<bs; l++) { 1010 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1011 } 1012 } 1013 len = cnt - pcnt; 1014 for (k=1; k<bs; k++) { 1015 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1016 cnt += len; 1017 } 1018 } 1019 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1020 1021 /* store the columns to the file */ 1022 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1023 if (!rank) { 1024 MPI_Status status; 1025 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1026 for (i=1; i<size; i++) { 1027 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1028 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1029 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1030 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1031 } 1032 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1033 } else { 1034 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1035 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1036 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1037 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1038 } 1039 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1040 1041 /* load up the numerical values */ 1042 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1043 cnt = 0; 1044 for (i=0; i<a->mbs; i++) { 1045 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1046 for (j=B->i[i]; j<B->i[i+1]; j++) { 1047 if (garray[B->j[j]] > cstart) break; 1048 for (l=0; l<bs; l++) { 1049 for (ll=0; ll<bs; ll++) { 1050 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1051 } 1052 } 1053 cnt += bs; 1054 } 1055 for (k=A->i[i]; k<A->i[i+1]; k++) { 1056 for (l=0; l<bs; l++) { 1057 for (ll=0; ll<bs; ll++) { 1058 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1059 } 1060 } 1061 cnt += bs; 1062 } 1063 for (; j<B->i[i+1]; j++) { 1064 for (l=0; l<bs; l++) { 1065 for (ll=0; ll<bs; ll++) { 1066 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1067 } 1068 } 1069 cnt += bs; 1070 } 1071 cnt += (bs-1)*rlen; 1072 } 1073 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1074 1075 /* store the column values to the file */ 1076 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1077 if (!rank) { 1078 MPI_Status status; 1079 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1080 for (i=1; i<size; i++) { 1081 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1082 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1083 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1084 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1085 } 1086 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1087 } else { 1088 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1089 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1090 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1091 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1092 } 1093 ierr = PetscFree(column_values);CHKERRQ(ierr); 1094 1095 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1096 if (file) { 1097 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1098 } 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatView_MPISBAIJ" 1104 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1105 { 1106 PetscErrorCode ierr; 1107 PetscBool iascii,isdraw,issocket,isbinary; 1108 1109 PetscFunctionBegin; 1110 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1111 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1112 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1113 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1114 if (iascii || isdraw || issocket) { 1115 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1116 } else if (isbinary) { 1117 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1118 } 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatDestroy_MPISBAIJ" 1124 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1125 { 1126 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 #if defined(PETSC_USE_LOG) 1131 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1132 #endif 1133 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1134 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1135 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1136 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1137 #if defined(PETSC_USE_CTABLE) 1138 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1139 #else 1140 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1141 #endif 1142 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1143 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1144 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1145 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1146 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1147 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1148 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1149 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1150 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1151 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1152 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1153 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1154 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1155 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1156 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1157 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1158 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1159 #endif 1160 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1161 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1162 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1163 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1164 1165 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 1170 #if defined(PETSC_HAVE_ELEMENTAL) 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1172 #endif 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 1178 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1179 { 1180 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1183 PetscScalar *from; 1184 const PetscScalar *x; 1185 1186 PetscFunctionBegin; 1187 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1188 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1189 1190 /* diagonal part */ 1191 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1192 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1193 1194 /* subdiagonal part */ 1195 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1196 1197 /* copy x into the vec slvec0 */ 1198 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1199 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1200 1201 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1202 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1203 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1204 1205 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 /* supperdiagonal part */ 1208 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatMult_MPISBAIJ" 1214 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1215 { 1216 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1219 PetscScalar *from; 1220 const PetscScalar *x; 1221 1222 PetscFunctionBegin; 1223 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1224 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1225 1226 /* diagonal part */ 1227 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1228 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1229 1230 /* subdiagonal part */ 1231 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1232 1233 /* copy x into the vec slvec0 */ 1234 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1235 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1236 1237 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1238 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1239 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1240 1241 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1242 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243 /* supperdiagonal part */ 1244 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1250 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1251 { 1252 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1253 PetscErrorCode ierr; 1254 PetscInt nt; 1255 1256 PetscFunctionBegin; 1257 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1258 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1259 1260 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1261 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1262 1263 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 /* do diagonal part */ 1265 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1266 /* do supperdiagonal part */ 1267 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1269 /* do subdiagonal part */ 1270 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1278 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1279 { 1280 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1283 PetscScalar *from,zero=0.0; 1284 const PetscScalar *x; 1285 1286 PetscFunctionBegin; 1287 /* 1288 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1289 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1290 */ 1291 /* diagonal part */ 1292 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1293 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1294 1295 /* subdiagonal part */ 1296 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1297 1298 /* copy x into the vec slvec0 */ 1299 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1300 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1301 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1302 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1303 1304 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1306 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1307 1308 /* supperdiagonal part */ 1309 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1315 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1316 { 1317 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1322 /* do diagonal part */ 1323 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1324 /* do supperdiagonal part */ 1325 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1326 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1327 1328 /* do subdiagonal part */ 1329 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1330 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1331 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 /* 1336 This only works correctly for square matrices where the subblock A->A is the 1337 diagonal block 1338 */ 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1341 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1342 { 1343 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1348 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatScale_MPISBAIJ" 1354 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1355 { 1356 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1357 PetscErrorCode ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1361 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1367 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1368 { 1369 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1370 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1371 PetscErrorCode ierr; 1372 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1373 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1374 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1375 1376 PetscFunctionBegin; 1377 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1378 mat->getrowactive = PETSC_TRUE; 1379 1380 if (!mat->rowvalues && (idx || v)) { 1381 /* 1382 allocate enough space to hold information from the longest row. 1383 */ 1384 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1385 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1386 PetscInt max = 1,mbs = mat->mbs,tmp; 1387 for (i=0; i<mbs; i++) { 1388 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1389 if (max < tmp) max = tmp; 1390 } 1391 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1392 } 1393 1394 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1395 lrow = row - brstart; /* local row index */ 1396 1397 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1398 if (!v) {pvA = 0; pvB = 0;} 1399 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1400 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1401 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1402 nztot = nzA + nzB; 1403 1404 cmap = mat->garray; 1405 if (v || idx) { 1406 if (nztot) { 1407 /* Sort by increasing column numbers, assuming A and B already sorted */ 1408 PetscInt imark = -1; 1409 if (v) { 1410 *v = v_p = mat->rowvalues; 1411 for (i=0; i<nzB; i++) { 1412 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1413 else break; 1414 } 1415 imark = i; 1416 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1417 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1418 } 1419 if (idx) { 1420 *idx = idx_p = mat->rowindices; 1421 if (imark > -1) { 1422 for (i=0; i<imark; i++) { 1423 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1424 } 1425 } else { 1426 for (i=0; i<nzB; i++) { 1427 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1428 else break; 1429 } 1430 imark = i; 1431 } 1432 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1433 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1434 } 1435 } else { 1436 if (idx) *idx = 0; 1437 if (v) *v = 0; 1438 } 1439 } 1440 *nz = nztot; 1441 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1442 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1448 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1449 { 1450 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1451 1452 PetscFunctionBegin; 1453 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1454 baij->getrowactive = PETSC_FALSE; 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1460 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1461 { 1462 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1463 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1464 1465 PetscFunctionBegin; 1466 aA->getrow_utriangular = PETSC_TRUE; 1467 PetscFunctionReturn(0); 1468 } 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1471 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1472 { 1473 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1474 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1475 1476 PetscFunctionBegin; 1477 aA->getrow_utriangular = PETSC_FALSE; 1478 PetscFunctionReturn(0); 1479 } 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1483 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1484 { 1485 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1486 PetscErrorCode ierr; 1487 1488 PetscFunctionBegin; 1489 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1490 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1496 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1497 { 1498 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1499 PetscErrorCode ierr; 1500 1501 PetscFunctionBegin; 1502 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1503 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1508 Input: isrow - distributed(parallel), 1509 iscol_local - locally owned (seq) 1510 */ 1511 #undef __FUNCT__ 1512 #define __FUNCT__ "ISEqual_private" 1513 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1514 { 1515 PetscErrorCode ierr; 1516 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1517 const PetscInt *ptr1,*ptr2; 1518 1519 PetscFunctionBegin; 1520 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1521 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1522 if (sz1 > sz2) { 1523 *flg = PETSC_FALSE; 1524 PetscFunctionReturn(0); 1525 } 1526 1527 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1528 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1529 1530 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1531 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1532 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1533 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1534 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1535 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1536 1537 nmatch=0; 1538 k = 0; 1539 for (i=0; i<sz1; i++){ 1540 for (j=k; j<sz2; j++){ 1541 if (a1[i] == a2[j]) { 1542 k = j; nmatch++; 1543 break; 1544 } 1545 } 1546 } 1547 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1548 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1549 ierr = PetscFree(a1);CHKERRQ(ierr); 1550 ierr = PetscFree(a2);CHKERRQ(ierr); 1551 if (nmatch < sz1) { 1552 *flg = PETSC_FALSE; 1553 } else { 1554 *flg = PETSC_TRUE; 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1561 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1562 { 1563 PetscErrorCode ierr; 1564 IS iscol_local; 1565 PetscInt csize; 1566 PetscBool isequal; 1567 1568 PetscFunctionBegin; 1569 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1570 if (call == MAT_REUSE_MATRIX) { 1571 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1572 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1573 } else { 1574 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1575 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1576 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1577 } 1578 1579 /* now call MatGetSubMatrix_MPIBAIJ() */ 1580 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1581 if (call == MAT_INITIAL_MATRIX) { 1582 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1583 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1590 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1591 { 1592 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1597 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1603 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1604 { 1605 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1606 Mat A = a->A,B = a->B; 1607 PetscErrorCode ierr; 1608 PetscReal isend[5],irecv[5]; 1609 1610 PetscFunctionBegin; 1611 info->block_size = (PetscReal)matin->rmap->bs; 1612 1613 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1614 1615 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1616 isend[3] = info->memory; isend[4] = info->mallocs; 1617 1618 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1619 1620 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1621 isend[3] += info->memory; isend[4] += info->mallocs; 1622 if (flag == MAT_LOCAL) { 1623 info->nz_used = isend[0]; 1624 info->nz_allocated = isend[1]; 1625 info->nz_unneeded = isend[2]; 1626 info->memory = isend[3]; 1627 info->mallocs = isend[4]; 1628 } else if (flag == MAT_GLOBAL_MAX) { 1629 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1630 1631 info->nz_used = irecv[0]; 1632 info->nz_allocated = irecv[1]; 1633 info->nz_unneeded = irecv[2]; 1634 info->memory = irecv[3]; 1635 info->mallocs = irecv[4]; 1636 } else if (flag == MAT_GLOBAL_SUM) { 1637 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1638 1639 info->nz_used = irecv[0]; 1640 info->nz_allocated = irecv[1]; 1641 info->nz_unneeded = irecv[2]; 1642 info->memory = irecv[3]; 1643 info->mallocs = irecv[4]; 1644 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1645 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1646 info->fill_ratio_needed = 0; 1647 info->factor_mallocs = 0; 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1653 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1654 { 1655 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1656 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1657 PetscErrorCode ierr; 1658 1659 PetscFunctionBegin; 1660 switch (op) { 1661 case MAT_NEW_NONZERO_LOCATIONS: 1662 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1663 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1664 case MAT_KEEP_NONZERO_PATTERN: 1665 case MAT_SUBMAT_SINGLEIS: 1666 case MAT_NEW_NONZERO_LOCATION_ERR: 1667 MatCheckPreallocated(A,1); 1668 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1669 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1670 break; 1671 case MAT_ROW_ORIENTED: 1672 MatCheckPreallocated(A,1); 1673 a->roworiented = flg; 1674 1675 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1676 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1677 break; 1678 case MAT_NEW_DIAGONALS: 1679 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1680 break; 1681 case MAT_IGNORE_OFF_PROC_ENTRIES: 1682 a->donotstash = flg; 1683 break; 1684 case MAT_USE_HASH_TABLE: 1685 a->ht_flag = flg; 1686 break; 1687 case MAT_HERMITIAN: 1688 MatCheckPreallocated(A,1); 1689 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1690 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1691 1692 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1693 break; 1694 case MAT_SPD: 1695 A->spd_set = PETSC_TRUE; 1696 A->spd = flg; 1697 if (flg) { 1698 A->symmetric = PETSC_TRUE; 1699 A->structurally_symmetric = PETSC_TRUE; 1700 A->symmetric_set = PETSC_TRUE; 1701 A->structurally_symmetric_set = PETSC_TRUE; 1702 } 1703 break; 1704 case MAT_SYMMETRIC: 1705 MatCheckPreallocated(A,1); 1706 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1707 break; 1708 case MAT_STRUCTURALLY_SYMMETRIC: 1709 MatCheckPreallocated(A,1); 1710 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1711 break; 1712 case MAT_SYMMETRY_ETERNAL: 1713 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1714 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1715 break; 1716 case MAT_IGNORE_LOWER_TRIANGULAR: 1717 aA->ignore_ltriangular = flg; 1718 break; 1719 case MAT_ERROR_LOWER_TRIANGULAR: 1720 aA->ignore_ltriangular = flg; 1721 break; 1722 case MAT_GETROW_UPPERTRIANGULAR: 1723 aA->getrow_utriangular = flg; 1724 break; 1725 default: 1726 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1727 } 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1733 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1734 { 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 if (MAT_INITIAL_MATRIX || *B != A) { 1739 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1746 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1747 { 1748 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1749 Mat a = baij->A, b=baij->B; 1750 PetscErrorCode ierr; 1751 PetscInt nv,m,n; 1752 PetscBool flg; 1753 1754 PetscFunctionBegin; 1755 if (ll != rr) { 1756 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1757 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1758 } 1759 if (!ll) PetscFunctionReturn(0); 1760 1761 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1762 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1763 1764 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1765 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1766 1767 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1768 1769 /* left diagonalscale the off-diagonal part */ 1770 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1771 1772 /* scale the diagonal part */ 1773 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1774 1775 /* right diagonalscale the off-diagonal part */ 1776 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1778 PetscFunctionReturn(0); 1779 } 1780 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1783 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1784 { 1785 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1786 PetscErrorCode ierr; 1787 1788 PetscFunctionBegin; 1789 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1794 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatEqual_MPISBAIJ" 1797 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1798 { 1799 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1800 Mat a,b,c,d; 1801 PetscBool flg; 1802 PetscErrorCode ierr; 1803 1804 PetscFunctionBegin; 1805 a = matA->A; b = matA->B; 1806 c = matB->A; d = matB->B; 1807 1808 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1809 if (flg) { 1810 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1811 } 1812 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatCopy_MPISBAIJ" 1818 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1819 { 1820 PetscErrorCode ierr; 1821 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1822 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1823 1824 PetscFunctionBegin; 1825 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1826 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1827 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1828 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1829 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1830 } else { 1831 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1832 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1833 } 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1839 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1840 { 1841 PetscErrorCode ierr; 1842 1843 PetscFunctionBegin; 1844 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1850 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1851 { 1852 PetscErrorCode ierr; 1853 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1854 PetscBLASInt bnz,one=1; 1855 Mat_SeqSBAIJ *xa,*ya; 1856 Mat_SeqBAIJ *xb,*yb; 1857 1858 PetscFunctionBegin; 1859 if (str == SAME_NONZERO_PATTERN) { 1860 PetscScalar alpha = a; 1861 xa = (Mat_SeqSBAIJ*)xx->A->data; 1862 ya = (Mat_SeqSBAIJ*)yy->A->data; 1863 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1864 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1865 xb = (Mat_SeqBAIJ*)xx->B->data; 1866 yb = (Mat_SeqBAIJ*)yy->B->data; 1867 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1868 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1869 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1870 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1871 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1872 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1873 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1874 } else { 1875 Mat B; 1876 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1877 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1878 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1879 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1880 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1881 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1882 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1883 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1884 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1885 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1886 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1887 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1888 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1889 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1890 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1891 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1892 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1893 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1894 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1895 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1896 } 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1902 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1903 { 1904 PetscErrorCode ierr; 1905 PetscInt i; 1906 PetscBool flg; 1907 1908 PetscFunctionBegin; 1909 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1910 for (i=0; i<n; i++) { 1911 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1912 if (!flg) { 1913 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1914 } 1915 } 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatShift_MPISBAIJ" 1921 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1922 { 1923 PetscErrorCode ierr; 1924 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1925 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1926 1927 PetscFunctionBegin; 1928 if (!Y->preallocated) { 1929 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1930 } else if (!aij->nz) { 1931 PetscInt nonew = aij->nonew; 1932 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1933 aij->nonew = nonew; 1934 } 1935 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ" 1941 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1942 { 1943 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1944 PetscErrorCode ierr; 1945 1946 PetscFunctionBegin; 1947 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1948 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1949 if (d) { 1950 PetscInt rstart; 1951 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1952 *d += rstart/A->rmap->bs; 1953 1954 } 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #undef __FUNCT__ 1959 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1960 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1961 { 1962 PetscFunctionBegin; 1963 *a = ((Mat_MPISBAIJ*)A->data)->A; 1964 PetscFunctionReturn(0); 1965 } 1966 1967 /* -------------------------------------------------------------------*/ 1968 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1969 MatGetRow_MPISBAIJ, 1970 MatRestoreRow_MPISBAIJ, 1971 MatMult_MPISBAIJ, 1972 /* 4*/ MatMultAdd_MPISBAIJ, 1973 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1974 MatMultAdd_MPISBAIJ, 1975 0, 1976 0, 1977 0, 1978 /* 10*/ 0, 1979 0, 1980 0, 1981 MatSOR_MPISBAIJ, 1982 MatTranspose_MPISBAIJ, 1983 /* 15*/ MatGetInfo_MPISBAIJ, 1984 MatEqual_MPISBAIJ, 1985 MatGetDiagonal_MPISBAIJ, 1986 MatDiagonalScale_MPISBAIJ, 1987 MatNorm_MPISBAIJ, 1988 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1989 MatAssemblyEnd_MPISBAIJ, 1990 MatSetOption_MPISBAIJ, 1991 MatZeroEntries_MPISBAIJ, 1992 /* 24*/ 0, 1993 0, 1994 0, 1995 0, 1996 0, 1997 /* 29*/ MatSetUp_MPISBAIJ, 1998 0, 1999 0, 2000 MatGetDiagonalBlock_MPISBAIJ, 2001 0, 2002 /* 34*/ MatDuplicate_MPISBAIJ, 2003 0, 2004 0, 2005 0, 2006 0, 2007 /* 39*/ MatAXPY_MPISBAIJ, 2008 MatGetSubMatrices_MPISBAIJ, 2009 MatIncreaseOverlap_MPISBAIJ, 2010 MatGetValues_MPISBAIJ, 2011 MatCopy_MPISBAIJ, 2012 /* 44*/ 0, 2013 MatScale_MPISBAIJ, 2014 MatShift_MPISBAIJ, 2015 0, 2016 0, 2017 /* 49*/ 0, 2018 0, 2019 0, 2020 0, 2021 0, 2022 /* 54*/ 0, 2023 0, 2024 MatSetUnfactored_MPISBAIJ, 2025 0, 2026 MatSetValuesBlocked_MPISBAIJ, 2027 /* 59*/ MatGetSubMatrix_MPISBAIJ, 2028 0, 2029 0, 2030 0, 2031 0, 2032 /* 64*/ 0, 2033 0, 2034 0, 2035 0, 2036 0, 2037 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 2038 0, 2039 0, 2040 0, 2041 0, 2042 /* 74*/ 0, 2043 0, 2044 0, 2045 0, 2046 0, 2047 /* 79*/ 0, 2048 0, 2049 0, 2050 0, 2051 MatLoad_MPISBAIJ, 2052 /* 84*/ 0, 2053 0, 2054 0, 2055 0, 2056 0, 2057 /* 89*/ 0, 2058 0, 2059 0, 2060 0, 2061 0, 2062 /* 94*/ 0, 2063 0, 2064 0, 2065 0, 2066 0, 2067 /* 99*/ 0, 2068 0, 2069 0, 2070 0, 2071 0, 2072 /*104*/ 0, 2073 MatRealPart_MPISBAIJ, 2074 MatImaginaryPart_MPISBAIJ, 2075 MatGetRowUpperTriangular_MPISBAIJ, 2076 MatRestoreRowUpperTriangular_MPISBAIJ, 2077 /*109*/ 0, 2078 0, 2079 0, 2080 0, 2081 MatMissingDiagonal_MPISBAIJ, 2082 /*114*/ 0, 2083 0, 2084 0, 2085 0, 2086 0, 2087 /*119*/ 0, 2088 0, 2089 0, 2090 0, 2091 0, 2092 /*124*/ 0, 2093 0, 2094 0, 2095 0, 2096 0, 2097 /*129*/ 0, 2098 0, 2099 0, 2100 0, 2101 0, 2102 /*134*/ 0, 2103 0, 2104 0, 2105 0, 2106 0, 2107 /*139*/ 0, 2108 0, 2109 0, 2110 0, 2111 0, 2112 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2113 }; 2114 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 2117 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2118 { 2119 Mat_MPISBAIJ *b; 2120 PetscErrorCode ierr; 2121 PetscInt i,mbs,Mbs; 2122 2123 PetscFunctionBegin; 2124 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2125 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2126 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2127 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2128 2129 b = (Mat_MPISBAIJ*)B->data; 2130 mbs = B->rmap->n/bs; 2131 Mbs = B->rmap->N/bs; 2132 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2133 2134 B->rmap->bs = bs; 2135 b->bs2 = bs*bs; 2136 b->mbs = mbs; 2137 b->Mbs = Mbs; 2138 b->nbs = B->cmap->n/bs; 2139 b->Nbs = B->cmap->N/bs; 2140 2141 for (i=0; i<=b->size; i++) { 2142 b->rangebs[i] = B->rmap->range[i]/bs; 2143 } 2144 b->rstartbs = B->rmap->rstart/bs; 2145 b->rendbs = B->rmap->rend/bs; 2146 2147 b->cstartbs = B->cmap->rstart/bs; 2148 b->cendbs = B->cmap->rend/bs; 2149 2150 #if defined(PETSC_USE_CTABLE) 2151 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2152 #else 2153 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2154 #endif 2155 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2156 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2157 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2158 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2159 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2160 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2161 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2162 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2163 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2164 2165 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2166 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2167 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2168 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2169 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2170 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2171 2172 if (!B->preallocated) { 2173 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2174 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2175 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2176 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2177 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2178 } 2179 2180 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2181 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2182 2183 B->preallocated = PETSC_TRUE; 2184 B->was_assembled = PETSC_FALSE; 2185 B->assembled = PETSC_FALSE; 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 2191 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2192 { 2193 PetscInt m,rstart,cstart,cend; 2194 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2195 const PetscInt *JJ =0; 2196 PetscScalar *values=0; 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2201 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2202 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2203 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2204 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2205 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2206 m = B->rmap->n/bs; 2207 rstart = B->rmap->rstart/bs; 2208 cstart = B->cmap->rstart/bs; 2209 cend = B->cmap->rend/bs; 2210 2211 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2212 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2213 for (i=0; i<m; i++) { 2214 nz = ii[i+1] - ii[i]; 2215 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2216 nz_max = PetscMax(nz_max,nz); 2217 JJ = jj + ii[i]; 2218 for (j=0; j<nz; j++) { 2219 if (*JJ >= cstart) break; 2220 JJ++; 2221 } 2222 d = 0; 2223 for (; j<nz; j++) { 2224 if (*JJ++ >= cend) break; 2225 d++; 2226 } 2227 d_nnz[i] = d; 2228 o_nnz[i] = nz - d; 2229 } 2230 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2231 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2232 2233 values = (PetscScalar*)V; 2234 if (!values) { 2235 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2236 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2237 } 2238 for (i=0; i<m; i++) { 2239 PetscInt row = i + rstart; 2240 PetscInt ncols = ii[i+1] - ii[i]; 2241 const PetscInt *icols = jj + ii[i]; 2242 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2243 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2244 } 2245 2246 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2247 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2248 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2249 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2250 PetscFunctionReturn(0); 2251 } 2252 2253 /*MC 2254 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2255 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2256 the matrix is stored. 2257 2258 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2259 can call MatSetOption(Mat, MAT_HERMITIAN); 2260 2261 Options Database Keys: 2262 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2263 2264 Level: beginner 2265 2266 .seealso: MatCreateMPISBAIJ 2267 M*/ 2268 2269 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatCreate_MPISBAIJ" 2273 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2274 { 2275 Mat_MPISBAIJ *b; 2276 PetscErrorCode ierr; 2277 PetscBool flg = PETSC_FALSE; 2278 2279 PetscFunctionBegin; 2280 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2281 B->data = (void*)b; 2282 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2283 2284 B->ops->destroy = MatDestroy_MPISBAIJ; 2285 B->ops->view = MatView_MPISBAIJ; 2286 B->assembled = PETSC_FALSE; 2287 B->insertmode = NOT_SET_VALUES; 2288 2289 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2290 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2291 2292 /* build local table of row and column ownerships */ 2293 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2294 2295 /* build cache for off array entries formed */ 2296 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2297 2298 b->donotstash = PETSC_FALSE; 2299 b->colmap = NULL; 2300 b->garray = NULL; 2301 b->roworiented = PETSC_TRUE; 2302 2303 /* stuff used in block assembly */ 2304 b->barray = 0; 2305 2306 /* stuff used for matrix vector multiply */ 2307 b->lvec = 0; 2308 b->Mvctx = 0; 2309 b->slvec0 = 0; 2310 b->slvec0b = 0; 2311 b->slvec1 = 0; 2312 b->slvec1a = 0; 2313 b->slvec1b = 0; 2314 b->sMvctx = 0; 2315 2316 /* stuff for MatGetRow() */ 2317 b->rowindices = 0; 2318 b->rowvalues = 0; 2319 b->getrowactive = PETSC_FALSE; 2320 2321 /* hash table stuff */ 2322 b->ht = 0; 2323 b->hd = 0; 2324 b->ht_size = 0; 2325 b->ht_flag = PETSC_FALSE; 2326 b->ht_fact = 0; 2327 b->ht_total_ct = 0; 2328 b->ht_insert_ct = 0; 2329 2330 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2331 b->ijonly = PETSC_FALSE; 2332 2333 b->in_loc = 0; 2334 b->v_loc = 0; 2335 b->n_loc = 0; 2336 2337 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2338 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2339 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2340 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2341 #if defined(PETSC_HAVE_ELEMENTAL) 2342 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2343 #endif 2344 2345 B->symmetric = PETSC_TRUE; 2346 B->structurally_symmetric = PETSC_TRUE; 2347 B->symmetric_set = PETSC_TRUE; 2348 B->structurally_symmetric_set = PETSC_TRUE; 2349 2350 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2351 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2352 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2353 if (flg) { 2354 PetscReal fact = 1.39; 2355 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2356 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2357 if (fact <= 1.0) fact = 1.39; 2358 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2359 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2360 } 2361 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 /*MC 2366 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2367 2368 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2369 and MATMPISBAIJ otherwise. 2370 2371 Options Database Keys: 2372 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2373 2374 Level: beginner 2375 2376 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2377 M*/ 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2381 /*@C 2382 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2383 the user should preallocate the matrix storage by setting the parameters 2384 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2385 performance can be increased by more than a factor of 50. 2386 2387 Collective on Mat 2388 2389 Input Parameters: 2390 + B - the matrix 2391 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2392 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2393 . d_nz - number of block nonzeros per block row in diagonal portion of local 2394 submatrix (same for all local rows) 2395 . d_nnz - array containing the number of block nonzeros in the various block rows 2396 in the upper triangular and diagonal part of the in diagonal portion of the local 2397 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2398 for the diagonal entry and set a value even if it is zero. 2399 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2400 submatrix (same for all local rows). 2401 - o_nnz - array containing the number of nonzeros in the various block rows of the 2402 off-diagonal portion of the local submatrix that is right of the diagonal 2403 (possibly different for each block row) or NULL. 2404 2405 2406 Options Database Keys: 2407 . -mat_no_unroll - uses code that does not unroll the loops in the 2408 block calculations (much slower) 2409 . -mat_block_size - size of the blocks to use 2410 2411 Notes: 2412 2413 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2414 than it must be used on all processors that share the object for that argument. 2415 2416 If the *_nnz parameter is given then the *_nz parameter is ignored 2417 2418 Storage Information: 2419 For a square global matrix we define each processor's diagonal portion 2420 to be its local rows and the corresponding columns (a square submatrix); 2421 each processor's off-diagonal portion encompasses the remainder of the 2422 local matrix (a rectangular submatrix). 2423 2424 The user can specify preallocated storage for the diagonal part of 2425 the local submatrix with either d_nz or d_nnz (not both). Set 2426 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2427 memory allocation. Likewise, specify preallocated storage for the 2428 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2429 2430 You can call MatGetInfo() to get information on how effective the preallocation was; 2431 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2432 You can also run with the option -info and look for messages with the string 2433 malloc in them to see if additional memory allocation was needed. 2434 2435 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2436 the figure below we depict these three local rows and all columns (0-11). 2437 2438 .vb 2439 0 1 2 3 4 5 6 7 8 9 10 11 2440 -------------------------- 2441 row 3 |. . . d d d o o o o o o 2442 row 4 |. . . d d d o o o o o o 2443 row 5 |. . . d d d o o o o o o 2444 -------------------------- 2445 .ve 2446 2447 Thus, any entries in the d locations are stored in the d (diagonal) 2448 submatrix, and any entries in the o locations are stored in the 2449 o (off-diagonal) submatrix. Note that the d matrix is stored in 2450 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2451 2452 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2453 plus the diagonal part of the d matrix, 2454 and o_nz should indicate the number of block nonzeros per row in the o matrix 2455 2456 In general, for PDE problems in which most nonzeros are near the diagonal, 2457 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2458 or you will get TERRIBLE performance; see the users' manual chapter on 2459 matrices. 2460 2461 Level: intermediate 2462 2463 .keywords: matrix, block, aij, compressed row, sparse, parallel 2464 2465 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2466 @*/ 2467 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2468 { 2469 PetscErrorCode ierr; 2470 2471 PetscFunctionBegin; 2472 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2473 PetscValidType(B,1); 2474 PetscValidLogicalCollectiveInt(B,bs,2); 2475 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2476 PetscFunctionReturn(0); 2477 } 2478 2479 #undef __FUNCT__ 2480 #define __FUNCT__ "MatCreateSBAIJ" 2481 /*@C 2482 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2483 (block compressed row). For good matrix assembly performance 2484 the user should preallocate the matrix storage by setting the parameters 2485 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2486 performance can be increased by more than a factor of 50. 2487 2488 Collective on MPI_Comm 2489 2490 Input Parameters: 2491 + comm - MPI communicator 2492 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2493 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2494 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2495 This value should be the same as the local size used in creating the 2496 y vector for the matrix-vector product y = Ax. 2497 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2498 This value should be the same as the local size used in creating the 2499 x vector for the matrix-vector product y = Ax. 2500 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2501 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2502 . d_nz - number of block nonzeros per block row in diagonal portion of local 2503 submatrix (same for all local rows) 2504 . d_nnz - array containing the number of block nonzeros in the various block rows 2505 in the upper triangular portion of the in diagonal portion of the local 2506 (possibly different for each block block row) or NULL. 2507 If you plan to factor the matrix you must leave room for the diagonal entry and 2508 set its value even if it is zero. 2509 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2510 submatrix (same for all local rows). 2511 - o_nnz - array containing the number of nonzeros in the various block rows of the 2512 off-diagonal portion of the local submatrix (possibly different for 2513 each block row) or NULL. 2514 2515 Output Parameter: 2516 . A - the matrix 2517 2518 Options Database Keys: 2519 . -mat_no_unroll - uses code that does not unroll the loops in the 2520 block calculations (much slower) 2521 . -mat_block_size - size of the blocks to use 2522 . -mat_mpi - use the parallel matrix data structures even on one processor 2523 (defaults to using SeqBAIJ format on one processor) 2524 2525 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2526 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2527 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2528 2529 Notes: 2530 The number of rows and columns must be divisible by blocksize. 2531 This matrix type does not support complex Hermitian operation. 2532 2533 The user MUST specify either the local or global matrix dimensions 2534 (possibly both). 2535 2536 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2537 than it must be used on all processors that share the object for that argument. 2538 2539 If the *_nnz parameter is given then the *_nz parameter is ignored 2540 2541 Storage Information: 2542 For a square global matrix we define each processor's diagonal portion 2543 to be its local rows and the corresponding columns (a square submatrix); 2544 each processor's off-diagonal portion encompasses the remainder of the 2545 local matrix (a rectangular submatrix). 2546 2547 The user can specify preallocated storage for the diagonal part of 2548 the local submatrix with either d_nz or d_nnz (not both). Set 2549 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2550 memory allocation. Likewise, specify preallocated storage for the 2551 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2552 2553 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2554 the figure below we depict these three local rows and all columns (0-11). 2555 2556 .vb 2557 0 1 2 3 4 5 6 7 8 9 10 11 2558 -------------------------- 2559 row 3 |. . . d d d o o o o o o 2560 row 4 |. . . d d d o o o o o o 2561 row 5 |. . . d d d o o o o o o 2562 -------------------------- 2563 .ve 2564 2565 Thus, any entries in the d locations are stored in the d (diagonal) 2566 submatrix, and any entries in the o locations are stored in the 2567 o (off-diagonal) submatrix. Note that the d matrix is stored in 2568 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2569 2570 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2571 plus the diagonal part of the d matrix, 2572 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2573 In general, for PDE problems in which most nonzeros are near the diagonal, 2574 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2575 or you will get TERRIBLE performance; see the users' manual chapter on 2576 matrices. 2577 2578 Level: intermediate 2579 2580 .keywords: matrix, block, aij, compressed row, sparse, parallel 2581 2582 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2583 @*/ 2584 2585 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2586 { 2587 PetscErrorCode ierr; 2588 PetscMPIInt size; 2589 2590 PetscFunctionBegin; 2591 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2592 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2593 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2594 if (size > 1) { 2595 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2596 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2597 } else { 2598 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2599 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2600 } 2601 PetscFunctionReturn(0); 2602 } 2603 2604 2605 #undef __FUNCT__ 2606 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2607 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2608 { 2609 Mat mat; 2610 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2611 PetscErrorCode ierr; 2612 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2613 PetscScalar *array; 2614 2615 PetscFunctionBegin; 2616 *newmat = 0; 2617 2618 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2619 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2620 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2621 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2622 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2623 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2624 2625 mat->factortype = matin->factortype; 2626 mat->preallocated = PETSC_TRUE; 2627 mat->assembled = PETSC_TRUE; 2628 mat->insertmode = NOT_SET_VALUES; 2629 2630 a = (Mat_MPISBAIJ*)mat->data; 2631 a->bs2 = oldmat->bs2; 2632 a->mbs = oldmat->mbs; 2633 a->nbs = oldmat->nbs; 2634 a->Mbs = oldmat->Mbs; 2635 a->Nbs = oldmat->Nbs; 2636 2637 2638 a->size = oldmat->size; 2639 a->rank = oldmat->rank; 2640 a->donotstash = oldmat->donotstash; 2641 a->roworiented = oldmat->roworiented; 2642 a->rowindices = 0; 2643 a->rowvalues = 0; 2644 a->getrowactive = PETSC_FALSE; 2645 a->barray = 0; 2646 a->rstartbs = oldmat->rstartbs; 2647 a->rendbs = oldmat->rendbs; 2648 a->cstartbs = oldmat->cstartbs; 2649 a->cendbs = oldmat->cendbs; 2650 2651 /* hash table stuff */ 2652 a->ht = 0; 2653 a->hd = 0; 2654 a->ht_size = 0; 2655 a->ht_flag = oldmat->ht_flag; 2656 a->ht_fact = oldmat->ht_fact; 2657 a->ht_total_ct = 0; 2658 a->ht_insert_ct = 0; 2659 2660 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2661 if (oldmat->colmap) { 2662 #if defined(PETSC_USE_CTABLE) 2663 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2664 #else 2665 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2666 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2667 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2668 #endif 2669 } else a->colmap = 0; 2670 2671 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2672 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2673 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2674 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2675 } else a->garray = 0; 2676 2677 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2678 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2679 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2680 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2681 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2682 2683 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2684 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2685 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2686 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2687 2688 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2689 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2690 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2691 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2692 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2693 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2694 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2695 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2696 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2697 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2698 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2699 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2700 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2701 2702 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2703 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2704 a->sMvctx = oldmat->sMvctx; 2705 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2706 2707 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2708 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2709 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2710 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2711 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2712 *newmat = mat; 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatLoad_MPISBAIJ" 2718 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2719 { 2720 PetscErrorCode ierr; 2721 PetscInt i,nz,j,rstart,rend; 2722 PetscScalar *vals,*buf; 2723 MPI_Comm comm; 2724 MPI_Status status; 2725 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2726 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2727 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2728 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2729 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2730 PetscInt dcount,kmax,k,nzcount,tmp; 2731 int fd; 2732 2733 PetscFunctionBegin; 2734 /* force binary viewer to load .info file if it has not yet done so */ 2735 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2736 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2737 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2738 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2739 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2740 if (bs < 0) bs = 1; 2741 2742 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2743 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2744 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2745 if (!rank) { 2746 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2747 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2748 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2749 } 2750 2751 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2752 M = header[1]; 2753 N = header[2]; 2754 2755 /* If global sizes are set, check if they are consistent with that given in the file */ 2756 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2757 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2758 2759 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2760 2761 /* 2762 This code adds extra rows to make sure the number of rows is 2763 divisible by the blocksize 2764 */ 2765 Mbs = M/bs; 2766 extra_rows = bs - M + bs*(Mbs); 2767 if (extra_rows == bs) extra_rows = 0; 2768 else Mbs++; 2769 if (extra_rows &&!rank) { 2770 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2771 } 2772 2773 /* determine ownership of all rows */ 2774 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2775 mbs = Mbs/size + ((Mbs % size) > rank); 2776 m = mbs*bs; 2777 } else { /* User Set */ 2778 m = newmat->rmap->n; 2779 mbs = m/bs; 2780 } 2781 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2782 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2783 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2784 rowners[0] = 0; 2785 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2786 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2787 rstart = rowners[rank]; 2788 rend = rowners[rank+1]; 2789 2790 /* distribute row lengths to all processors */ 2791 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2792 if (!rank) { 2793 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2794 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2795 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2796 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2797 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2798 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2799 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2800 } else { 2801 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2802 } 2803 2804 if (!rank) { /* procs[0] */ 2805 /* calculate the number of nonzeros on each processor */ 2806 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2807 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2808 for (i=0; i<size; i++) { 2809 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2810 procsnz[i] += rowlengths[j]; 2811 } 2812 } 2813 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2814 2815 /* determine max buffer needed and allocate it */ 2816 maxnz = 0; 2817 for (i=0; i<size; i++) { 2818 maxnz = PetscMax(maxnz,procsnz[i]); 2819 } 2820 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2821 2822 /* read in my part of the matrix column indices */ 2823 nz = procsnz[0]; 2824 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2825 mycols = ibuf; 2826 if (size == 1) nz -= extra_rows; 2827 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2828 if (size == 1) { 2829 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2830 } 2831 2832 /* read in every ones (except the last) and ship off */ 2833 for (i=1; i<size-1; i++) { 2834 nz = procsnz[i]; 2835 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2836 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2837 } 2838 /* read in the stuff for the last proc */ 2839 if (size != 1) { 2840 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2841 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2842 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2843 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2844 } 2845 ierr = PetscFree(cols);CHKERRQ(ierr); 2846 } else { /* procs[i], i>0 */ 2847 /* determine buffer space needed for message */ 2848 nz = 0; 2849 for (i=0; i<m; i++) nz += locrowlens[i]; 2850 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2851 mycols = ibuf; 2852 /* receive message of column indices*/ 2853 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2854 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2855 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2856 } 2857 2858 /* loop over local rows, determining number of off diagonal entries */ 2859 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2860 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2861 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2862 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2863 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2864 rowcount = 0; 2865 nzcount = 0; 2866 for (i=0; i<mbs; i++) { 2867 dcount = 0; 2868 odcount = 0; 2869 for (j=0; j<bs; j++) { 2870 kmax = locrowlens[rowcount]; 2871 for (k=0; k<kmax; k++) { 2872 tmp = mycols[nzcount++]/bs; /* block col. index */ 2873 if (!mask[tmp]) { 2874 mask[tmp] = 1; 2875 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2876 else masked1[dcount++] = tmp; /* entry in diag portion */ 2877 } 2878 } 2879 rowcount++; 2880 } 2881 2882 dlens[i] = dcount; /* d_nzz[i] */ 2883 odlens[i] = odcount; /* o_nzz[i] */ 2884 2885 /* zero out the mask elements we set */ 2886 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2887 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2888 } 2889 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2890 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2891 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2892 2893 if (!rank) { 2894 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2895 /* read in my part of the matrix numerical values */ 2896 nz = procsnz[0]; 2897 vals = buf; 2898 mycols = ibuf; 2899 if (size == 1) nz -= extra_rows; 2900 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2901 if (size == 1) { 2902 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2903 } 2904 2905 /* insert into matrix */ 2906 jj = rstart*bs; 2907 for (i=0; i<m; i++) { 2908 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2909 mycols += locrowlens[i]; 2910 vals += locrowlens[i]; 2911 jj++; 2912 } 2913 2914 /* read in other processors (except the last one) and ship out */ 2915 for (i=1; i<size-1; i++) { 2916 nz = procsnz[i]; 2917 vals = buf; 2918 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2919 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2920 } 2921 /* the last proc */ 2922 if (size != 1) { 2923 nz = procsnz[i] - extra_rows; 2924 vals = buf; 2925 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2926 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2927 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2928 } 2929 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2930 2931 } else { 2932 /* receive numeric values */ 2933 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2934 2935 /* receive message of values*/ 2936 vals = buf; 2937 mycols = ibuf; 2938 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2939 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2940 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2941 2942 /* insert into matrix */ 2943 jj = rstart*bs; 2944 for (i=0; i<m; i++) { 2945 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2946 mycols += locrowlens[i]; 2947 vals += locrowlens[i]; 2948 jj++; 2949 } 2950 } 2951 2952 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2953 ierr = PetscFree(buf);CHKERRQ(ierr); 2954 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2955 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2956 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2957 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2958 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2960 PetscFunctionReturn(0); 2961 } 2962 2963 #undef __FUNCT__ 2964 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2965 /*XXXXX@ 2966 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2967 2968 Input Parameters: 2969 . mat - the matrix 2970 . fact - factor 2971 2972 Not Collective on Mat, each process can have a different hash factor 2973 2974 Level: advanced 2975 2976 Notes: 2977 This can also be set by the command line option: -mat_use_hash_table fact 2978 2979 .keywords: matrix, hashtable, factor, HT 2980 2981 .seealso: MatSetOption() 2982 @XXXXX*/ 2983 2984 2985 #undef __FUNCT__ 2986 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2987 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2988 { 2989 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2990 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2991 PetscReal atmp; 2992 PetscReal *work,*svalues,*rvalues; 2993 PetscErrorCode ierr; 2994 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2995 PetscMPIInt rank,size; 2996 PetscInt *rowners_bs,dest,count,source; 2997 PetscScalar *va; 2998 MatScalar *ba; 2999 MPI_Status stat; 3000 3001 PetscFunctionBegin; 3002 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 3003 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 3004 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3005 3006 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3007 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3008 3009 bs = A->rmap->bs; 3010 mbs = a->mbs; 3011 Mbs = a->Mbs; 3012 ba = b->a; 3013 bi = b->i; 3014 bj = b->j; 3015 3016 /* find ownerships */ 3017 rowners_bs = A->rmap->range; 3018 3019 /* each proc creates an array to be distributed */ 3020 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 3021 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 3022 3023 /* row_max for B */ 3024 if (rank != size-1) { 3025 for (i=0; i<mbs; i++) { 3026 ncols = bi[1] - bi[0]; bi++; 3027 brow = bs*i; 3028 for (j=0; j<ncols; j++) { 3029 bcol = bs*(*bj); 3030 for (kcol=0; kcol<bs; kcol++) { 3031 col = bcol + kcol; /* local col index */ 3032 col += rowners_bs[rank+1]; /* global col index */ 3033 for (krow=0; krow<bs; krow++) { 3034 atmp = PetscAbsScalar(*ba); ba++; 3035 row = brow + krow; /* local row index */ 3036 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 3037 if (work[col] < atmp) work[col] = atmp; 3038 } 3039 } 3040 bj++; 3041 } 3042 } 3043 3044 /* send values to its owners */ 3045 for (dest=rank+1; dest<size; dest++) { 3046 svalues = work + rowners_bs[dest]; 3047 count = rowners_bs[dest+1]-rowners_bs[dest]; 3048 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 3049 } 3050 } 3051 3052 /* receive values */ 3053 if (rank) { 3054 rvalues = work; 3055 count = rowners_bs[rank+1]-rowners_bs[rank]; 3056 for (source=0; source<rank; source++) { 3057 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 3058 /* process values */ 3059 for (i=0; i<count; i++) { 3060 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 3061 } 3062 } 3063 } 3064 3065 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3066 ierr = PetscFree(work);CHKERRQ(ierr); 3067 PetscFunctionReturn(0); 3068 } 3069 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "MatSOR_MPISBAIJ" 3072 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3073 { 3074 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3075 PetscErrorCode ierr; 3076 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 3077 PetscScalar *x,*ptr,*from; 3078 Vec bb1; 3079 const PetscScalar *b; 3080 3081 PetscFunctionBegin; 3082 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3083 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3084 3085 if (flag == SOR_APPLY_UPPER) { 3086 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3087 PetscFunctionReturn(0); 3088 } 3089 3090 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3091 if (flag & SOR_ZERO_INITIAL_GUESS) { 3092 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3093 its--; 3094 } 3095 3096 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3097 while (its--) { 3098 3099 /* lower triangular part: slvec0b = - B^T*xx */ 3100 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3101 3102 /* copy xx into slvec0a */ 3103 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3104 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3105 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3106 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3107 3108 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3109 3110 /* copy bb into slvec1a */ 3111 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3112 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3113 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3114 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3115 3116 /* set slvec1b = 0 */ 3117 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3118 3119 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3120 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3121 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3122 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3123 3124 /* upper triangular part: bb1 = bb1 - B*x */ 3125 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3126 3127 /* local diagonal sweep */ 3128 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3129 } 3130 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3131 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3132 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3133 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3134 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3135 } else if (flag & SOR_EISENSTAT) { 3136 Vec xx1; 3137 PetscBool hasop; 3138 const PetscScalar *diag; 3139 PetscScalar *sl,scale = (omega - 2.0)/omega; 3140 PetscInt i,n; 3141 3142 if (!mat->xx1) { 3143 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3144 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3145 } 3146 xx1 = mat->xx1; 3147 bb1 = mat->bb1; 3148 3149 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3150 3151 if (!mat->diag) { 3152 /* this is wrong for same matrix with new nonzero values */ 3153 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3154 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3155 } 3156 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3157 3158 if (hasop) { 3159 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3160 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3161 } else { 3162 /* 3163 These two lines are replaced by code that may be a bit faster for a good compiler 3164 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3165 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3166 */ 3167 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3168 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3169 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3170 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3171 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3172 if (omega == 1.0) { 3173 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3174 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3175 } else { 3176 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3177 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3178 } 3179 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3180 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3181 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3182 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3183 } 3184 3185 /* multiply off-diagonal portion of matrix */ 3186 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3187 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3188 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3189 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3190 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3191 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3192 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3193 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3194 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3195 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3196 3197 /* local sweep */ 3198 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3199 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3200 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3201 PetscFunctionReturn(0); 3202 } 3203 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 3206 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3207 { 3208 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3209 PetscErrorCode ierr; 3210 Vec lvec1,bb1; 3211 3212 PetscFunctionBegin; 3213 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3214 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3215 3216 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3217 if (flag & SOR_ZERO_INITIAL_GUESS) { 3218 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3219 its--; 3220 } 3221 3222 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3223 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3224 while (its--) { 3225 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3226 3227 /* lower diagonal part: bb1 = bb - B^T*xx */ 3228 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3229 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3230 3231 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3232 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3233 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3234 3235 /* upper diagonal part: bb1 = bb1 - B*x */ 3236 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3237 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3238 3239 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3240 3241 /* diagonal sweep */ 3242 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3243 } 3244 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3245 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3246 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3247 PetscFunctionReturn(0); 3248 } 3249 3250 #undef __FUNCT__ 3251 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3252 /*@ 3253 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3254 CSR format the local rows. 3255 3256 Collective on MPI_Comm 3257 3258 Input Parameters: 3259 + comm - MPI communicator 3260 . bs - the block size, only a block size of 1 is supported 3261 . m - number of local rows (Cannot be PETSC_DECIDE) 3262 . n - This value should be the same as the local size used in creating the 3263 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3264 calculated if N is given) For square matrices n is almost always m. 3265 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3266 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3267 . i - row indices 3268 . j - column indices 3269 - a - matrix values 3270 3271 Output Parameter: 3272 . mat - the matrix 3273 3274 Level: intermediate 3275 3276 Notes: 3277 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3278 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3279 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3280 3281 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3282 3283 .keywords: matrix, aij, compressed row, sparse, parallel 3284 3285 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3286 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3287 @*/ 3288 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3289 { 3290 PetscErrorCode ierr; 3291 3292 3293 PetscFunctionBegin; 3294 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3295 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3296 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3297 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3298 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3299 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3300 PetscFunctionReturn(0); 3301 } 3302 3303 3304 #undef __FUNCT__ 3305 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3306 /*@C 3307 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3308 (the default parallel PETSc format). 3309 3310 Collective on MPI_Comm 3311 3312 Input Parameters: 3313 + B - the matrix 3314 . bs - the block size 3315 . i - the indices into j for the start of each local row (starts with zero) 3316 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3317 - v - optional values in the matrix 3318 3319 Level: developer 3320 3321 .keywords: matrix, aij, compressed row, sparse, parallel 3322 3323 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3324 @*/ 3325 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3326 { 3327 PetscErrorCode ierr; 3328 3329 PetscFunctionBegin; 3330 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3331 PetscFunctionReturn(0); 3332 } 3333 3334 #undef __FUNCT__ 3335 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3336 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3337 { 3338 PetscErrorCode ierr; 3339 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3340 PetscInt *indx; 3341 PetscScalar *values; 3342 3343 PetscFunctionBegin; 3344 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3345 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3346 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3347 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3348 PetscInt *bindx,rmax=a->rmax,j; 3349 3350 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3351 mbs = m/bs; Nbs = N/cbs; 3352 if (n == PETSC_DECIDE) { 3353 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3354 } 3355 /* Check sum(n) = Nbs */ 3356 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3357 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3358 3359 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3360 rstart -= mbs; 3361 3362 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3363 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3364 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3365 for (i=0; i<mbs; i++) { 3366 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3367 nnz = nnz/bs; 3368 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3369 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3370 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3371 } 3372 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3373 ierr = PetscFree(bindx);CHKERRQ(ierr); 3374 3375 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3376 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3377 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3378 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3379 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3380 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3381 } 3382 3383 /* numeric phase */ 3384 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3385 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3386 3387 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3388 for (i=0; i<m; i++) { 3389 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3390 Ii = i + rstart; 3391 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3392 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3393 } 3394 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3395 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3396 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3397 PetscFunctionReturn(0); 3398 } 3399