1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 637 } 638 /* Bmat */ 639 v = bmat->a; jj = bmat->j; 640 for (brow=0; brow<mbs; brow++) { 641 grow = bs*(rstart + brow); 642 nz = bmat->i[brow+1] - bmat->i[brow]; 643 for (bcol=0; bcol<nz; bcol++) { 644 gcol = bs*garray[*jj]; jj++; 645 for (col=0; col<bs; col++) { 646 for (row=0; row<bs; row++) { 647 vabs = PetscAbsScalar(*v); v++; 648 rsum[gcol+col] += vabs; 649 rsum[grow+row] += vabs; 650 } 651 } 652 } 653 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 654 } 655 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 656 *norm = 0.0; 657 for (col=0; col<mat->cmap->N; col++) { 658 if (rsum2[col] > *norm) *norm = rsum2[col]; 659 } 660 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 668 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt nstash,reallocs; 673 674 PetscFunctionBegin; 675 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 676 677 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 678 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 679 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 680 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 681 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 682 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 688 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 689 { 690 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 691 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 692 PetscErrorCode ierr; 693 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 694 PetscInt *row,*col; 695 PetscBool other_disassembled; 696 PetscMPIInt n; 697 PetscBool r1,r2,r3; 698 MatScalar *val; 699 700 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 701 PetscFunctionBegin; 702 if (!baij->donotstash && !mat->nooffprocentries) { 703 while (1) { 704 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 705 if (!flg) break; 706 707 for (i=0; i<n;) { 708 /* Now identify the consecutive vals belonging to the same row */ 709 for (j=i,rstart=row[j]; j<n; j++) { 710 if (row[j] != rstart) break; 711 } 712 if (j < n) ncols = j-i; 713 else ncols = n-i; 714 /* Now assemble all these values with a single function call */ 715 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 716 i = j; 717 } 718 } 719 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 720 /* Now process the block-stash. Since the values are stashed column-oriented, 721 set the roworiented flag to column oriented, and after MatSetValues() 722 restore the original flags */ 723 r1 = baij->roworiented; 724 r2 = a->roworiented; 725 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 726 727 baij->roworiented = PETSC_FALSE; 728 a->roworiented = PETSC_FALSE; 729 730 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 731 while (1) { 732 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 733 if (!flg) break; 734 735 for (i=0; i<n;) { 736 /* Now identify the consecutive vals belonging to the same row */ 737 for (j=i,rstart=row[j]; j<n; j++) { 738 if (row[j] != rstart) break; 739 } 740 if (j < n) ncols = j-i; 741 else ncols = n-i; 742 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 743 i = j; 744 } 745 } 746 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 747 748 baij->roworiented = r1; 749 a->roworiented = r2; 750 751 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 752 } 753 754 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 755 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 756 757 /* determine if any processor has disassembled, if so we must 758 also disassemble ourselfs, in order that we may reassemble. */ 759 /* 760 if nonzero structure of submatrix B cannot change then we know that 761 no processor disassembled thus we can skip this stuff 762 */ 763 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 764 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 765 if (mat->was_assembled && !other_disassembled) { 766 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 767 } 768 } 769 770 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 771 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 772 } 773 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 774 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 775 776 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 777 778 baij->rowvalues = 0; 779 780 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 781 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 782 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 783 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 784 } 785 PetscFunctionReturn(0); 786 } 787 788 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer); 789 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 790 #include <petscdraw.h> 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 793 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 794 { 795 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 796 PetscErrorCode ierr; 797 PetscInt bs = mat->rmap->bs; 798 PetscMPIInt rank = baij->rank; 799 PetscBool iascii,isdraw; 800 PetscViewer sviewer; 801 PetscViewerFormat format; 802 803 PetscFunctionBegin; 804 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 805 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 806 if (iascii) { 807 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 808 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 809 MatInfo info; 810 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 811 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 812 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 814 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 816 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 818 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 819 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 820 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 821 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } else if (format == PETSC_VIEWER_ASCII_INFO) { 824 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 827 PetscFunctionReturn(0); 828 } 829 } 830 831 if (isdraw) { 832 PetscDraw draw; 833 PetscBool isnull; 834 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 835 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 836 if (isnull) PetscFunctionReturn(0); 837 } 838 839 { 840 /* assemble the entire matrix onto first processor. */ 841 Mat A; 842 Mat_SeqSBAIJ *Aloc; 843 Mat_SeqBAIJ *Bloc; 844 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 845 MatScalar *a; 846 const char *matname; 847 848 /* Should this be the same type as mat? */ 849 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 850 if (!rank) { 851 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 852 } else { 853 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 854 } 855 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 856 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 857 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 858 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 859 860 /* copy over the A part */ 861 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 862 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 863 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 864 865 for (i=0; i<mbs; i++) { 866 rvals[0] = bs*(baij->rstartbs + i); 867 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 868 for (j=ai[i]; j<ai[i+1]; j++) { 869 col = (baij->cstartbs+aj[j])*bs; 870 for (k=0; k<bs; k++) { 871 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 872 col++; 873 a += bs; 874 } 875 } 876 } 877 /* copy over the B part */ 878 Bloc = (Mat_SeqBAIJ*)baij->B->data; 879 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 880 for (i=0; i<mbs; i++) { 881 882 rvals[0] = bs*(baij->rstartbs + i); 883 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 884 for (j=ai[i]; j<ai[i+1]; j++) { 885 col = baij->garray[aj[j]]*bs; 886 for (k=0; k<bs; k++) { 887 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 888 col++; 889 a += bs; 890 } 891 } 892 } 893 ierr = PetscFree(rvals);CHKERRQ(ierr); 894 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 895 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 /* 897 Everyone has to call to draw the matrix since the graphics waits are 898 synchronized across all processors that share the PetscDraw object 899 */ 900 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 901 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 902 if (!rank) { 903 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 904 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 905 } 906 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 907 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 908 ierr = MatDestroy(&A);CHKERRQ(ierr); 909 } 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatView_MPISBAIJ_Binary" 915 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 916 { 917 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 918 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 919 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 920 PetscErrorCode ierr; 921 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 922 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 923 int fd; 924 PetscScalar *column_values; 925 FILE *file; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 PetscInt message_count,flowcontrolcount; 928 929 PetscFunctionBegin; 930 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 931 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 932 nz = bs2*(A->nz + B->nz); 933 rlen = mat->rmap->n; 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 if (!rank) { 936 header[0] = MAT_FILE_CLASSID; 937 header[1] = mat->rmap->N; 938 header[2] = mat->cmap->N; 939 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 941 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 942 /* get largest number of rows any processor has */ 943 range = mat->rmap->range; 944 for (i=1; i<size; i++) { 945 rlen = PetscMax(rlen,range[i+1] - range[i]); 946 } 947 } else { 948 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 949 } 950 951 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 952 /* compute lengths of each row */ 953 for (i=0; i<a->mbs; i++) { 954 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 955 } 956 /* store the row lengths to the file */ 957 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 958 if (!rank) { 959 MPI_Status status; 960 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 961 rlen = (range[1] - range[0])/bs; 962 for (i=0; i<rlen; i++) { 963 for (j=0; j<bs; j++) { 964 row_lens[i*bs+j] = bs*crow_lens[i]; 965 } 966 } 967 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 for (i=1; i<size; i++) { 969 rlen = (range[i+1] - range[i])/bs; 970 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 971 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 972 for (k=0; k<rlen; k++) { 973 for (j=0; j<bs; j++) { 974 row_lens[k*bs+j] = bs*crow_lens[k]; 975 } 976 } 977 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 978 } 979 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 980 ierr = PetscFree(row_lens);CHKERRQ(ierr); 981 } else { 982 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 983 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 984 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 985 } 986 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 987 988 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 989 information needed to make it for each row from a block row. This does require more communication but still not more than 990 the communication needed for the nonzero values */ 991 nzmax = nz; /* space a largest processor needs */ 992 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 993 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 994 cnt = 0; 995 for (i=0; i<a->mbs; i++) { 996 pcnt = cnt; 997 for (j=B->i[i]; j<B->i[i+1]; j++) { 998 if ((col = garray[B->j[j]]) > cstart) break; 999 for (l=0; l<bs; l++) { 1000 column_indices[cnt++] = bs*col+l; 1001 } 1002 } 1003 for (k=A->i[i]; k<A->i[i+1]; k++) { 1004 for (l=0; l<bs; l++) { 1005 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1006 } 1007 } 1008 for (; j<B->i[i+1]; j++) { 1009 for (l=0; l<bs; l++) { 1010 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1011 } 1012 } 1013 len = cnt - pcnt; 1014 for (k=1; k<bs; k++) { 1015 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1016 cnt += len; 1017 } 1018 } 1019 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1020 1021 /* store the columns to the file */ 1022 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1023 if (!rank) { 1024 MPI_Status status; 1025 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1026 for (i=1; i<size; i++) { 1027 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1028 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1029 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1030 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1031 } 1032 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1033 } else { 1034 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1035 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1036 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1037 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1038 } 1039 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1040 1041 /* load up the numerical values */ 1042 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1043 cnt = 0; 1044 for (i=0; i<a->mbs; i++) { 1045 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1046 for (j=B->i[i]; j<B->i[i+1]; j++) { 1047 if (garray[B->j[j]] > cstart) break; 1048 for (l=0; l<bs; l++) { 1049 for (ll=0; ll<bs; ll++) { 1050 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1051 } 1052 } 1053 cnt += bs; 1054 } 1055 for (k=A->i[i]; k<A->i[i+1]; k++) { 1056 for (l=0; l<bs; l++) { 1057 for (ll=0; ll<bs; ll++) { 1058 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1059 } 1060 } 1061 cnt += bs; 1062 } 1063 for (; j<B->i[i+1]; j++) { 1064 for (l=0; l<bs; l++) { 1065 for (ll=0; ll<bs; ll++) { 1066 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1067 } 1068 } 1069 cnt += bs; 1070 } 1071 cnt += (bs-1)*rlen; 1072 } 1073 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1074 1075 /* store the column values to the file */ 1076 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1077 if (!rank) { 1078 MPI_Status status; 1079 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1080 for (i=1; i<size; i++) { 1081 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1082 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1083 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1084 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1085 } 1086 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1087 } else { 1088 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1089 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1090 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1091 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1092 } 1093 ierr = PetscFree(column_values);CHKERRQ(ierr); 1094 1095 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1096 if (file) { 1097 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1098 } 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatView_MPISBAIJ" 1104 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1105 { 1106 PetscErrorCode ierr; 1107 PetscBool iascii,isdraw,issocket,isbinary; 1108 1109 PetscFunctionBegin; 1110 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1111 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1112 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1113 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1114 if (iascii || isdraw || issocket) { 1115 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1116 } else if (isbinary) { 1117 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1118 } 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatDestroy_MPISBAIJ" 1124 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1125 { 1126 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 #if defined(PETSC_USE_LOG) 1131 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1132 #endif 1133 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1134 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1135 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1136 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1137 #if defined(PETSC_USE_CTABLE) 1138 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1139 #else 1140 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1141 #endif 1142 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1143 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1144 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1145 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1146 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1147 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1148 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1149 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1150 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1151 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1152 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1153 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1154 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1155 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1156 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1157 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1158 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1159 #endif 1160 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1161 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1162 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1163 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1164 1165 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 1170 #if defined(PETSC_HAVE_ELEMENTAL) 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1172 #endif 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 1178 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1179 { 1180 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1183 PetscScalar *from; 1184 const PetscScalar *x; 1185 1186 PetscFunctionBegin; 1187 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1188 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1189 1190 /* diagonal part */ 1191 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1192 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1193 1194 /* subdiagonal part */ 1195 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1196 1197 /* copy x into the vec slvec0 */ 1198 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1199 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1200 1201 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1202 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1203 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1204 1205 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 /* supperdiagonal part */ 1208 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatMult_MPISBAIJ" 1214 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1215 { 1216 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1219 PetscScalar *from; 1220 const PetscScalar *x; 1221 1222 PetscFunctionBegin; 1223 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1224 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1225 1226 /* diagonal part */ 1227 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1228 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1229 1230 /* subdiagonal part */ 1231 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1232 1233 /* copy x into the vec slvec0 */ 1234 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1235 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1236 1237 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1238 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1239 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1240 1241 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1242 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243 /* supperdiagonal part */ 1244 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1250 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1251 { 1252 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1253 PetscErrorCode ierr; 1254 PetscInt nt; 1255 1256 PetscFunctionBegin; 1257 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1258 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1259 1260 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1261 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1262 1263 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 /* do diagonal part */ 1265 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1266 /* do supperdiagonal part */ 1267 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1269 /* do subdiagonal part */ 1270 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1278 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1279 { 1280 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1283 PetscScalar *from,zero=0.0; 1284 const PetscScalar *x; 1285 1286 PetscFunctionBegin; 1287 /* 1288 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1289 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1290 */ 1291 /* diagonal part */ 1292 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1293 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1294 1295 /* subdiagonal part */ 1296 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1297 1298 /* copy x into the vec slvec0 */ 1299 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1300 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1301 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1302 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1303 1304 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1306 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1307 1308 /* supperdiagonal part */ 1309 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1315 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1316 { 1317 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1322 /* do diagonal part */ 1323 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1324 /* do supperdiagonal part */ 1325 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1326 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1327 1328 /* do subdiagonal part */ 1329 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1330 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1331 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 /* 1336 This only works correctly for square matrices where the subblock A->A is the 1337 diagonal block 1338 */ 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1341 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1342 { 1343 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1348 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatScale_MPISBAIJ" 1354 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1355 { 1356 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1357 PetscErrorCode ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1361 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1367 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1368 { 1369 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1370 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1371 PetscErrorCode ierr; 1372 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1373 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1374 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1375 1376 PetscFunctionBegin; 1377 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1378 mat->getrowactive = PETSC_TRUE; 1379 1380 if (!mat->rowvalues && (idx || v)) { 1381 /* 1382 allocate enough space to hold information from the longest row. 1383 */ 1384 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1385 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1386 PetscInt max = 1,mbs = mat->mbs,tmp; 1387 for (i=0; i<mbs; i++) { 1388 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1389 if (max < tmp) max = tmp; 1390 } 1391 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1392 } 1393 1394 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1395 lrow = row - brstart; /* local row index */ 1396 1397 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1398 if (!v) {pvA = 0; pvB = 0;} 1399 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1400 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1401 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1402 nztot = nzA + nzB; 1403 1404 cmap = mat->garray; 1405 if (v || idx) { 1406 if (nztot) { 1407 /* Sort by increasing column numbers, assuming A and B already sorted */ 1408 PetscInt imark = -1; 1409 if (v) { 1410 *v = v_p = mat->rowvalues; 1411 for (i=0; i<nzB; i++) { 1412 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1413 else break; 1414 } 1415 imark = i; 1416 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1417 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1418 } 1419 if (idx) { 1420 *idx = idx_p = mat->rowindices; 1421 if (imark > -1) { 1422 for (i=0; i<imark; i++) { 1423 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1424 } 1425 } else { 1426 for (i=0; i<nzB; i++) { 1427 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1428 else break; 1429 } 1430 imark = i; 1431 } 1432 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1433 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1434 } 1435 } else { 1436 if (idx) *idx = 0; 1437 if (v) *v = 0; 1438 } 1439 } 1440 *nz = nztot; 1441 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1442 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1448 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1449 { 1450 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1451 1452 PetscFunctionBegin; 1453 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1454 baij->getrowactive = PETSC_FALSE; 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1460 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1461 { 1462 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1463 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1464 1465 PetscFunctionBegin; 1466 aA->getrow_utriangular = PETSC_TRUE; 1467 PetscFunctionReturn(0); 1468 } 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1471 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1472 { 1473 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1474 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1475 1476 PetscFunctionBegin; 1477 aA->getrow_utriangular = PETSC_FALSE; 1478 PetscFunctionReturn(0); 1479 } 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1483 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1484 { 1485 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1486 PetscErrorCode ierr; 1487 1488 PetscFunctionBegin; 1489 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1490 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1496 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1497 { 1498 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1499 PetscErrorCode ierr; 1500 1501 PetscFunctionBegin; 1502 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1503 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1508 Input: isrow - distributed(parallel), 1509 iscol_local - locally owned (seq) 1510 */ 1511 #undef __FUNCT__ 1512 #define __FUNCT__ "ISEqual_private" 1513 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1514 { 1515 PetscErrorCode ierr; 1516 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1517 const PetscInt *ptr1,*ptr2; 1518 1519 PetscFunctionBegin; 1520 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1521 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1522 if (sz1 > sz2) { 1523 *flg = PETSC_FALSE; 1524 PetscFunctionReturn(0); 1525 } 1526 1527 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1528 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1529 1530 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1531 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1532 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1533 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1534 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1535 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1536 1537 nmatch=0; 1538 k = 0; 1539 for (i=0; i<sz1; i++){ 1540 for (j=k; j<sz2; j++){ 1541 if (a1[i] == a2[j]) { 1542 k = j; nmatch++; 1543 break; 1544 } 1545 } 1546 } 1547 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1548 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1549 ierr = PetscFree(a1);CHKERRQ(ierr); 1550 ierr = PetscFree(a2);CHKERRQ(ierr); 1551 if (nmatch < sz1) { 1552 *flg = PETSC_FALSE; 1553 } else { 1554 *flg = PETSC_TRUE; 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1561 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1562 { 1563 PetscErrorCode ierr; 1564 IS iscol_local; 1565 PetscInt csize; 1566 PetscBool isequal; 1567 1568 PetscFunctionBegin; 1569 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1570 if (call == MAT_REUSE_MATRIX) { 1571 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1572 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1573 } else { 1574 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1575 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1576 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1577 } 1578 1579 /* now call MatGetSubMatrix_MPIBAIJ() */ 1580 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1581 if (call == MAT_INITIAL_MATRIX) { 1582 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1583 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1590 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1591 { 1592 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1597 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1603 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1604 { 1605 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1606 Mat A = a->A,B = a->B; 1607 PetscErrorCode ierr; 1608 PetscReal isend[5],irecv[5]; 1609 1610 PetscFunctionBegin; 1611 info->block_size = (PetscReal)matin->rmap->bs; 1612 1613 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1614 1615 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1616 isend[3] = info->memory; isend[4] = info->mallocs; 1617 1618 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1619 1620 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1621 isend[3] += info->memory; isend[4] += info->mallocs; 1622 if (flag == MAT_LOCAL) { 1623 info->nz_used = isend[0]; 1624 info->nz_allocated = isend[1]; 1625 info->nz_unneeded = isend[2]; 1626 info->memory = isend[3]; 1627 info->mallocs = isend[4]; 1628 } else if (flag == MAT_GLOBAL_MAX) { 1629 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1630 1631 info->nz_used = irecv[0]; 1632 info->nz_allocated = irecv[1]; 1633 info->nz_unneeded = irecv[2]; 1634 info->memory = irecv[3]; 1635 info->mallocs = irecv[4]; 1636 } else if (flag == MAT_GLOBAL_SUM) { 1637 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1638 1639 info->nz_used = irecv[0]; 1640 info->nz_allocated = irecv[1]; 1641 info->nz_unneeded = irecv[2]; 1642 info->memory = irecv[3]; 1643 info->mallocs = irecv[4]; 1644 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1645 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1646 info->fill_ratio_needed = 0; 1647 info->factor_mallocs = 0; 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1653 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1654 { 1655 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1656 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1657 PetscErrorCode ierr; 1658 1659 PetscFunctionBegin; 1660 switch (op) { 1661 case MAT_NEW_NONZERO_LOCATIONS: 1662 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1663 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1664 case MAT_KEEP_NONZERO_PATTERN: 1665 case MAT_NEW_NONZERO_LOCATION_ERR: 1666 MatCheckPreallocated(A,1); 1667 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1668 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1669 break; 1670 case MAT_ROW_ORIENTED: 1671 MatCheckPreallocated(A,1); 1672 a->roworiented = flg; 1673 1674 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1675 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1676 break; 1677 case MAT_NEW_DIAGONALS: 1678 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1679 break; 1680 case MAT_IGNORE_OFF_PROC_ENTRIES: 1681 a->donotstash = flg; 1682 break; 1683 case MAT_USE_HASH_TABLE: 1684 a->ht_flag = flg; 1685 break; 1686 case MAT_HERMITIAN: 1687 MatCheckPreallocated(A,1); 1688 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1689 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1690 1691 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1692 break; 1693 case MAT_SPD: 1694 A->spd_set = PETSC_TRUE; 1695 A->spd = flg; 1696 if (flg) { 1697 A->symmetric = PETSC_TRUE; 1698 A->structurally_symmetric = PETSC_TRUE; 1699 A->symmetric_set = PETSC_TRUE; 1700 A->structurally_symmetric_set = PETSC_TRUE; 1701 } 1702 break; 1703 case MAT_SYMMETRIC: 1704 MatCheckPreallocated(A,1); 1705 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1706 break; 1707 case MAT_STRUCTURALLY_SYMMETRIC: 1708 MatCheckPreallocated(A,1); 1709 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1710 break; 1711 case MAT_SYMMETRY_ETERNAL: 1712 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1713 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1714 break; 1715 case MAT_IGNORE_LOWER_TRIANGULAR: 1716 aA->ignore_ltriangular = flg; 1717 break; 1718 case MAT_ERROR_LOWER_TRIANGULAR: 1719 aA->ignore_ltriangular = flg; 1720 break; 1721 case MAT_GETROW_UPPERTRIANGULAR: 1722 aA->getrow_utriangular = flg; 1723 break; 1724 default: 1725 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1732 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1733 { 1734 PetscErrorCode ierr; 1735 1736 PetscFunctionBegin; 1737 if (MAT_INITIAL_MATRIX || *B != A) { 1738 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1739 } 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1745 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1746 { 1747 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1748 Mat a = baij->A, b=baij->B; 1749 PetscErrorCode ierr; 1750 PetscInt nv,m,n; 1751 PetscBool flg; 1752 1753 PetscFunctionBegin; 1754 if (ll != rr) { 1755 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1756 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1757 } 1758 if (!ll) PetscFunctionReturn(0); 1759 1760 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1761 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1762 1763 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1764 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1765 1766 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1767 1768 /* left diagonalscale the off-diagonal part */ 1769 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1770 1771 /* scale the diagonal part */ 1772 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1773 1774 /* right diagonalscale the off-diagonal part */ 1775 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1776 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1782 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1783 { 1784 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1785 PetscErrorCode ierr; 1786 1787 PetscFunctionBegin; 1788 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1789 PetscFunctionReturn(0); 1790 } 1791 1792 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatEqual_MPISBAIJ" 1796 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1797 { 1798 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1799 Mat a,b,c,d; 1800 PetscBool flg; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 a = matA->A; b = matA->B; 1805 c = matB->A; d = matB->B; 1806 1807 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1808 if (flg) { 1809 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1810 } 1811 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatCopy_MPISBAIJ" 1817 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1818 { 1819 PetscErrorCode ierr; 1820 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1821 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1822 1823 PetscFunctionBegin; 1824 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1825 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1826 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1827 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1828 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1829 } else { 1830 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1831 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1832 } 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1838 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1839 { 1840 PetscErrorCode ierr; 1841 1842 PetscFunctionBegin; 1843 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1849 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1850 { 1851 PetscErrorCode ierr; 1852 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1853 PetscBLASInt bnz,one=1; 1854 Mat_SeqSBAIJ *xa,*ya; 1855 Mat_SeqBAIJ *xb,*yb; 1856 1857 PetscFunctionBegin; 1858 if (str == SAME_NONZERO_PATTERN) { 1859 PetscScalar alpha = a; 1860 xa = (Mat_SeqSBAIJ*)xx->A->data; 1861 ya = (Mat_SeqSBAIJ*)yy->A->data; 1862 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1863 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1864 xb = (Mat_SeqBAIJ*)xx->B->data; 1865 yb = (Mat_SeqBAIJ*)yy->B->data; 1866 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1867 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1868 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1869 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1870 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1871 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1872 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1873 } else { 1874 Mat B; 1875 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1876 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1877 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1878 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1879 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1880 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1881 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1882 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1883 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1884 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1885 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1886 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1887 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1888 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1889 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1890 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1891 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1892 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1893 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1894 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1901 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1902 { 1903 PetscErrorCode ierr; 1904 PetscInt i; 1905 PetscBool flg; 1906 1907 PetscFunctionBegin; 1908 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1909 for (i=0; i<n; i++) { 1910 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1911 if (!flg) { 1912 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1913 } 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatShift_MPISBAIJ" 1920 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1921 { 1922 PetscErrorCode ierr; 1923 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1924 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1925 1926 PetscFunctionBegin; 1927 if (!Y->preallocated) { 1928 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1929 } else if (!aij->nz) { 1930 PetscInt nonew = aij->nonew; 1931 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1932 aij->nonew = nonew; 1933 } 1934 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 #undef __FUNCT__ 1939 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ" 1940 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1941 { 1942 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1943 PetscErrorCode ierr; 1944 1945 PetscFunctionBegin; 1946 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1947 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1948 if (d) { 1949 PetscInt rstart; 1950 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1951 *d += rstart/A->rmap->bs; 1952 1953 } 1954 PetscFunctionReturn(0); 1955 } 1956 1957 #undef __FUNCT__ 1958 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1959 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1960 { 1961 PetscFunctionBegin; 1962 *a = ((Mat_MPISBAIJ*)A->data)->A; 1963 PetscFunctionReturn(0); 1964 } 1965 1966 /* -------------------------------------------------------------------*/ 1967 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1968 MatGetRow_MPISBAIJ, 1969 MatRestoreRow_MPISBAIJ, 1970 MatMult_MPISBAIJ, 1971 /* 4*/ MatMultAdd_MPISBAIJ, 1972 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1973 MatMultAdd_MPISBAIJ, 1974 0, 1975 0, 1976 0, 1977 /* 10*/ 0, 1978 0, 1979 0, 1980 MatSOR_MPISBAIJ, 1981 MatTranspose_MPISBAIJ, 1982 /* 15*/ MatGetInfo_MPISBAIJ, 1983 MatEqual_MPISBAIJ, 1984 MatGetDiagonal_MPISBAIJ, 1985 MatDiagonalScale_MPISBAIJ, 1986 MatNorm_MPISBAIJ, 1987 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1988 MatAssemblyEnd_MPISBAIJ, 1989 MatSetOption_MPISBAIJ, 1990 MatZeroEntries_MPISBAIJ, 1991 /* 24*/ 0, 1992 0, 1993 0, 1994 0, 1995 0, 1996 /* 29*/ MatSetUp_MPISBAIJ, 1997 0, 1998 0, 1999 MatGetDiagonalBlock_MPISBAIJ, 2000 0, 2001 /* 34*/ MatDuplicate_MPISBAIJ, 2002 0, 2003 0, 2004 0, 2005 0, 2006 /* 39*/ MatAXPY_MPISBAIJ, 2007 MatGetSubMatrices_MPISBAIJ, 2008 MatIncreaseOverlap_MPISBAIJ, 2009 MatGetValues_MPISBAIJ, 2010 MatCopy_MPISBAIJ, 2011 /* 44*/ 0, 2012 MatScale_MPISBAIJ, 2013 MatShift_MPISBAIJ, 2014 0, 2015 0, 2016 /* 49*/ 0, 2017 0, 2018 0, 2019 0, 2020 0, 2021 /* 54*/ 0, 2022 0, 2023 MatSetUnfactored_MPISBAIJ, 2024 0, 2025 MatSetValuesBlocked_MPISBAIJ, 2026 /* 59*/ MatGetSubMatrix_MPISBAIJ, 2027 0, 2028 0, 2029 0, 2030 0, 2031 /* 64*/ 0, 2032 0, 2033 0, 2034 0, 2035 0, 2036 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 2037 0, 2038 0, 2039 0, 2040 0, 2041 /* 74*/ 0, 2042 0, 2043 0, 2044 0, 2045 0, 2046 /* 79*/ 0, 2047 0, 2048 0, 2049 0, 2050 MatLoad_MPISBAIJ, 2051 /* 84*/ 0, 2052 0, 2053 0, 2054 0, 2055 0, 2056 /* 89*/ 0, 2057 0, 2058 0, 2059 0, 2060 0, 2061 /* 94*/ 0, 2062 0, 2063 0, 2064 0, 2065 0, 2066 /* 99*/ 0, 2067 0, 2068 0, 2069 0, 2070 0, 2071 /*104*/ 0, 2072 MatRealPart_MPISBAIJ, 2073 MatImaginaryPart_MPISBAIJ, 2074 MatGetRowUpperTriangular_MPISBAIJ, 2075 MatRestoreRowUpperTriangular_MPISBAIJ, 2076 /*109*/ 0, 2077 0, 2078 0, 2079 0, 2080 MatMissingDiagonal_MPISBAIJ, 2081 /*114*/ 0, 2082 0, 2083 0, 2084 0, 2085 0, 2086 /*119*/ 0, 2087 0, 2088 0, 2089 0, 2090 0, 2091 /*124*/ 0, 2092 0, 2093 0, 2094 0, 2095 0, 2096 /*129*/ 0, 2097 0, 2098 0, 2099 0, 2100 0, 2101 /*134*/ 0, 2102 0, 2103 0, 2104 0, 2105 0, 2106 /*139*/ 0, 2107 0, 2108 0, 2109 0, 2110 0, 2111 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2112 }; 2113 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 2116 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2117 { 2118 Mat_MPISBAIJ *b; 2119 PetscErrorCode ierr; 2120 PetscInt i,mbs,Mbs; 2121 2122 PetscFunctionBegin; 2123 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2124 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2125 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2126 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2127 2128 b = (Mat_MPISBAIJ*)B->data; 2129 mbs = B->rmap->n/bs; 2130 Mbs = B->rmap->N/bs; 2131 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2132 2133 B->rmap->bs = bs; 2134 b->bs2 = bs*bs; 2135 b->mbs = mbs; 2136 b->Mbs = Mbs; 2137 b->nbs = B->cmap->n/bs; 2138 b->Nbs = B->cmap->N/bs; 2139 2140 for (i=0; i<=b->size; i++) { 2141 b->rangebs[i] = B->rmap->range[i]/bs; 2142 } 2143 b->rstartbs = B->rmap->rstart/bs; 2144 b->rendbs = B->rmap->rend/bs; 2145 2146 b->cstartbs = B->cmap->rstart/bs; 2147 b->cendbs = B->cmap->rend/bs; 2148 2149 #if defined(PETSC_USE_CTABLE) 2150 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2151 #else 2152 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2153 #endif 2154 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2155 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2156 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2157 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2158 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2159 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2160 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2161 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2162 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2163 2164 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2165 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2166 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2167 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2168 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2169 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2170 2171 if (!B->preallocated) { 2172 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2173 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2174 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2175 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2176 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2177 } 2178 2179 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2180 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2181 2182 B->preallocated = PETSC_TRUE; 2183 B->was_assembled = PETSC_FALSE; 2184 B->assembled = PETSC_FALSE; 2185 PetscFunctionReturn(0); 2186 } 2187 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 2190 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2191 { 2192 PetscInt m,rstart,cstart,cend; 2193 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2194 const PetscInt *JJ =0; 2195 PetscScalar *values=0; 2196 PetscErrorCode ierr; 2197 2198 PetscFunctionBegin; 2199 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2200 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2201 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2202 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2203 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2204 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2205 m = B->rmap->n/bs; 2206 rstart = B->rmap->rstart/bs; 2207 cstart = B->cmap->rstart/bs; 2208 cend = B->cmap->rend/bs; 2209 2210 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2211 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2212 for (i=0; i<m; i++) { 2213 nz = ii[i+1] - ii[i]; 2214 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2215 nz_max = PetscMax(nz_max,nz); 2216 JJ = jj + ii[i]; 2217 for (j=0; j<nz; j++) { 2218 if (*JJ >= cstart) break; 2219 JJ++; 2220 } 2221 d = 0; 2222 for (; j<nz; j++) { 2223 if (*JJ++ >= cend) break; 2224 d++; 2225 } 2226 d_nnz[i] = d; 2227 o_nnz[i] = nz - d; 2228 } 2229 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2230 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2231 2232 values = (PetscScalar*)V; 2233 if (!values) { 2234 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2235 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2236 } 2237 for (i=0; i<m; i++) { 2238 PetscInt row = i + rstart; 2239 PetscInt ncols = ii[i+1] - ii[i]; 2240 const PetscInt *icols = jj + ii[i]; 2241 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2242 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2243 } 2244 2245 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2246 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2247 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2248 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2249 PetscFunctionReturn(0); 2250 } 2251 2252 /*MC 2253 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2254 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2255 the matrix is stored. 2256 2257 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2258 can call MatSetOption(Mat, MAT_HERMITIAN); 2259 2260 Options Database Keys: 2261 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2262 2263 Level: beginner 2264 2265 .seealso: MatCreateMPISBAIJ 2266 M*/ 2267 2268 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2269 2270 #undef __FUNCT__ 2271 #define __FUNCT__ "MatCreate_MPISBAIJ" 2272 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2273 { 2274 Mat_MPISBAIJ *b; 2275 PetscErrorCode ierr; 2276 PetscBool flg = PETSC_FALSE; 2277 2278 PetscFunctionBegin; 2279 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2280 B->data = (void*)b; 2281 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2282 2283 B->ops->destroy = MatDestroy_MPISBAIJ; 2284 B->ops->view = MatView_MPISBAIJ; 2285 B->assembled = PETSC_FALSE; 2286 B->insertmode = NOT_SET_VALUES; 2287 2288 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2289 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2290 2291 /* build local table of row and column ownerships */ 2292 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2293 2294 /* build cache for off array entries formed */ 2295 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2296 2297 b->donotstash = PETSC_FALSE; 2298 b->colmap = NULL; 2299 b->garray = NULL; 2300 b->roworiented = PETSC_TRUE; 2301 2302 /* stuff used in block assembly */ 2303 b->barray = 0; 2304 2305 /* stuff used for matrix vector multiply */ 2306 b->lvec = 0; 2307 b->Mvctx = 0; 2308 b->slvec0 = 0; 2309 b->slvec0b = 0; 2310 b->slvec1 = 0; 2311 b->slvec1a = 0; 2312 b->slvec1b = 0; 2313 b->sMvctx = 0; 2314 2315 /* stuff for MatGetRow() */ 2316 b->rowindices = 0; 2317 b->rowvalues = 0; 2318 b->getrowactive = PETSC_FALSE; 2319 2320 /* hash table stuff */ 2321 b->ht = 0; 2322 b->hd = 0; 2323 b->ht_size = 0; 2324 b->ht_flag = PETSC_FALSE; 2325 b->ht_fact = 0; 2326 b->ht_total_ct = 0; 2327 b->ht_insert_ct = 0; 2328 2329 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2330 b->ijonly = PETSC_FALSE; 2331 2332 b->in_loc = 0; 2333 b->v_loc = 0; 2334 b->n_loc = 0; 2335 2336 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2337 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2338 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2339 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2340 #if defined(PETSC_HAVE_ELEMENTAL) 2341 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2342 #endif 2343 2344 B->symmetric = PETSC_TRUE; 2345 B->structurally_symmetric = PETSC_TRUE; 2346 B->symmetric_set = PETSC_TRUE; 2347 B->structurally_symmetric_set = PETSC_TRUE; 2348 2349 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2350 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2351 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2352 if (flg) { 2353 PetscReal fact = 1.39; 2354 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2355 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2356 if (fact <= 1.0) fact = 1.39; 2357 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2358 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2359 } 2360 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2361 PetscFunctionReturn(0); 2362 } 2363 2364 /*MC 2365 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2366 2367 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2368 and MATMPISBAIJ otherwise. 2369 2370 Options Database Keys: 2371 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2372 2373 Level: beginner 2374 2375 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2376 M*/ 2377 2378 #undef __FUNCT__ 2379 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2380 /*@C 2381 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2382 the user should preallocate the matrix storage by setting the parameters 2383 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2384 performance can be increased by more than a factor of 50. 2385 2386 Collective on Mat 2387 2388 Input Parameters: 2389 + B - the matrix 2390 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2391 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2392 . d_nz - number of block nonzeros per block row in diagonal portion of local 2393 submatrix (same for all local rows) 2394 . d_nnz - array containing the number of block nonzeros in the various block rows 2395 in the upper triangular and diagonal part of the in diagonal portion of the local 2396 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2397 for the diagonal entry and set a value even if it is zero. 2398 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2399 submatrix (same for all local rows). 2400 - o_nnz - array containing the number of nonzeros in the various block rows of the 2401 off-diagonal portion of the local submatrix that is right of the diagonal 2402 (possibly different for each block row) or NULL. 2403 2404 2405 Options Database Keys: 2406 . -mat_no_unroll - uses code that does not unroll the loops in the 2407 block calculations (much slower) 2408 . -mat_block_size - size of the blocks to use 2409 2410 Notes: 2411 2412 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2413 than it must be used on all processors that share the object for that argument. 2414 2415 If the *_nnz parameter is given then the *_nz parameter is ignored 2416 2417 Storage Information: 2418 For a square global matrix we define each processor's diagonal portion 2419 to be its local rows and the corresponding columns (a square submatrix); 2420 each processor's off-diagonal portion encompasses the remainder of the 2421 local matrix (a rectangular submatrix). 2422 2423 The user can specify preallocated storage for the diagonal part of 2424 the local submatrix with either d_nz or d_nnz (not both). Set 2425 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2426 memory allocation. Likewise, specify preallocated storage for the 2427 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2428 2429 You can call MatGetInfo() to get information on how effective the preallocation was; 2430 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2431 You can also run with the option -info and look for messages with the string 2432 malloc in them to see if additional memory allocation was needed. 2433 2434 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2435 the figure below we depict these three local rows and all columns (0-11). 2436 2437 .vb 2438 0 1 2 3 4 5 6 7 8 9 10 11 2439 -------------------------- 2440 row 3 |. . . d d d o o o o o o 2441 row 4 |. . . d d d o o o o o o 2442 row 5 |. . . d d d o o o o o o 2443 -------------------------- 2444 .ve 2445 2446 Thus, any entries in the d locations are stored in the d (diagonal) 2447 submatrix, and any entries in the o locations are stored in the 2448 o (off-diagonal) submatrix. Note that the d matrix is stored in 2449 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2450 2451 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2452 plus the diagonal part of the d matrix, 2453 and o_nz should indicate the number of block nonzeros per row in the o matrix 2454 2455 In general, for PDE problems in which most nonzeros are near the diagonal, 2456 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2457 or you will get TERRIBLE performance; see the users' manual chapter on 2458 matrices. 2459 2460 Level: intermediate 2461 2462 .keywords: matrix, block, aij, compressed row, sparse, parallel 2463 2464 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2465 @*/ 2466 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2467 { 2468 PetscErrorCode ierr; 2469 2470 PetscFunctionBegin; 2471 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2472 PetscValidType(B,1); 2473 PetscValidLogicalCollectiveInt(B,bs,2); 2474 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2475 PetscFunctionReturn(0); 2476 } 2477 2478 #undef __FUNCT__ 2479 #define __FUNCT__ "MatCreateSBAIJ" 2480 /*@C 2481 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2482 (block compressed row). For good matrix assembly performance 2483 the user should preallocate the matrix storage by setting the parameters 2484 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2485 performance can be increased by more than a factor of 50. 2486 2487 Collective on MPI_Comm 2488 2489 Input Parameters: 2490 + comm - MPI communicator 2491 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2492 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2493 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2494 This value should be the same as the local size used in creating the 2495 y vector for the matrix-vector product y = Ax. 2496 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2497 This value should be the same as the local size used in creating the 2498 x vector for the matrix-vector product y = Ax. 2499 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2500 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2501 . d_nz - number of block nonzeros per block row in diagonal portion of local 2502 submatrix (same for all local rows) 2503 . d_nnz - array containing the number of block nonzeros in the various block rows 2504 in the upper triangular portion of the in diagonal portion of the local 2505 (possibly different for each block block row) or NULL. 2506 If you plan to factor the matrix you must leave room for the diagonal entry and 2507 set its value even if it is zero. 2508 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2509 submatrix (same for all local rows). 2510 - o_nnz - array containing the number of nonzeros in the various block rows of the 2511 off-diagonal portion of the local submatrix (possibly different for 2512 each block row) or NULL. 2513 2514 Output Parameter: 2515 . A - the matrix 2516 2517 Options Database Keys: 2518 . -mat_no_unroll - uses code that does not unroll the loops in the 2519 block calculations (much slower) 2520 . -mat_block_size - size of the blocks to use 2521 . -mat_mpi - use the parallel matrix data structures even on one processor 2522 (defaults to using SeqBAIJ format on one processor) 2523 2524 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2525 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2526 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2527 2528 Notes: 2529 The number of rows and columns must be divisible by blocksize. 2530 This matrix type does not support complex Hermitian operation. 2531 2532 The user MUST specify either the local or global matrix dimensions 2533 (possibly both). 2534 2535 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2536 than it must be used on all processors that share the object for that argument. 2537 2538 If the *_nnz parameter is given then the *_nz parameter is ignored 2539 2540 Storage Information: 2541 For a square global matrix we define each processor's diagonal portion 2542 to be its local rows and the corresponding columns (a square submatrix); 2543 each processor's off-diagonal portion encompasses the remainder of the 2544 local matrix (a rectangular submatrix). 2545 2546 The user can specify preallocated storage for the diagonal part of 2547 the local submatrix with either d_nz or d_nnz (not both). Set 2548 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2549 memory allocation. Likewise, specify preallocated storage for the 2550 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2551 2552 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2553 the figure below we depict these three local rows and all columns (0-11). 2554 2555 .vb 2556 0 1 2 3 4 5 6 7 8 9 10 11 2557 -------------------------- 2558 row 3 |. . . d d d o o o o o o 2559 row 4 |. . . d d d o o o o o o 2560 row 5 |. . . d d d o o o o o o 2561 -------------------------- 2562 .ve 2563 2564 Thus, any entries in the d locations are stored in the d (diagonal) 2565 submatrix, and any entries in the o locations are stored in the 2566 o (off-diagonal) submatrix. Note that the d matrix is stored in 2567 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2568 2569 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2570 plus the diagonal part of the d matrix, 2571 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2572 In general, for PDE problems in which most nonzeros are near the diagonal, 2573 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2574 or you will get TERRIBLE performance; see the users' manual chapter on 2575 matrices. 2576 2577 Level: intermediate 2578 2579 .keywords: matrix, block, aij, compressed row, sparse, parallel 2580 2581 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2582 @*/ 2583 2584 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2585 { 2586 PetscErrorCode ierr; 2587 PetscMPIInt size; 2588 2589 PetscFunctionBegin; 2590 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2591 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2592 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2593 if (size > 1) { 2594 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2595 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2596 } else { 2597 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2598 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2599 } 2600 PetscFunctionReturn(0); 2601 } 2602 2603 2604 #undef __FUNCT__ 2605 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2606 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2607 { 2608 Mat mat; 2609 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2610 PetscErrorCode ierr; 2611 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2612 PetscScalar *array; 2613 2614 PetscFunctionBegin; 2615 *newmat = 0; 2616 2617 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2618 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2619 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2620 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2621 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2622 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2623 2624 mat->factortype = matin->factortype; 2625 mat->preallocated = PETSC_TRUE; 2626 mat->assembled = PETSC_TRUE; 2627 mat->insertmode = NOT_SET_VALUES; 2628 2629 a = (Mat_MPISBAIJ*)mat->data; 2630 a->bs2 = oldmat->bs2; 2631 a->mbs = oldmat->mbs; 2632 a->nbs = oldmat->nbs; 2633 a->Mbs = oldmat->Mbs; 2634 a->Nbs = oldmat->Nbs; 2635 2636 2637 a->size = oldmat->size; 2638 a->rank = oldmat->rank; 2639 a->donotstash = oldmat->donotstash; 2640 a->roworiented = oldmat->roworiented; 2641 a->rowindices = 0; 2642 a->rowvalues = 0; 2643 a->getrowactive = PETSC_FALSE; 2644 a->barray = 0; 2645 a->rstartbs = oldmat->rstartbs; 2646 a->rendbs = oldmat->rendbs; 2647 a->cstartbs = oldmat->cstartbs; 2648 a->cendbs = oldmat->cendbs; 2649 2650 /* hash table stuff */ 2651 a->ht = 0; 2652 a->hd = 0; 2653 a->ht_size = 0; 2654 a->ht_flag = oldmat->ht_flag; 2655 a->ht_fact = oldmat->ht_fact; 2656 a->ht_total_ct = 0; 2657 a->ht_insert_ct = 0; 2658 2659 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2660 if (oldmat->colmap) { 2661 #if defined(PETSC_USE_CTABLE) 2662 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2663 #else 2664 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2665 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2666 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2667 #endif 2668 } else a->colmap = 0; 2669 2670 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2671 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2672 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2673 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2674 } else a->garray = 0; 2675 2676 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2677 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2678 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2679 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2680 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2681 2682 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2683 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2684 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2685 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2686 2687 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2688 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2689 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2690 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2691 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2692 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2693 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2694 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2695 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2696 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2697 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2698 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2699 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2700 2701 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2702 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2703 a->sMvctx = oldmat->sMvctx; 2704 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2705 2706 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2707 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2708 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2709 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2710 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2711 *newmat = mat; 2712 PetscFunctionReturn(0); 2713 } 2714 2715 #undef __FUNCT__ 2716 #define __FUNCT__ "MatLoad_MPISBAIJ" 2717 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2718 { 2719 PetscErrorCode ierr; 2720 PetscInt i,nz,j,rstart,rend; 2721 PetscScalar *vals,*buf; 2722 MPI_Comm comm; 2723 MPI_Status status; 2724 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2725 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2726 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2727 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2728 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2729 PetscInt dcount,kmax,k,nzcount,tmp; 2730 int fd; 2731 2732 PetscFunctionBegin; 2733 /* force binary viewer to load .info file if it has not yet done so */ 2734 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2735 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2736 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2737 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2738 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2739 if (bs < 0) bs = 1; 2740 2741 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2742 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2743 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2744 if (!rank) { 2745 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2746 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2747 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2748 } 2749 2750 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2751 M = header[1]; 2752 N = header[2]; 2753 2754 /* If global sizes are set, check if they are consistent with that given in the file */ 2755 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2756 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2757 2758 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2759 2760 /* 2761 This code adds extra rows to make sure the number of rows is 2762 divisible by the blocksize 2763 */ 2764 Mbs = M/bs; 2765 extra_rows = bs - M + bs*(Mbs); 2766 if (extra_rows == bs) extra_rows = 0; 2767 else Mbs++; 2768 if (extra_rows &&!rank) { 2769 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2770 } 2771 2772 /* determine ownership of all rows */ 2773 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2774 mbs = Mbs/size + ((Mbs % size) > rank); 2775 m = mbs*bs; 2776 } else { /* User Set */ 2777 m = newmat->rmap->n; 2778 mbs = m/bs; 2779 } 2780 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2781 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2782 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2783 rowners[0] = 0; 2784 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2785 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2786 rstart = rowners[rank]; 2787 rend = rowners[rank+1]; 2788 2789 /* distribute row lengths to all processors */ 2790 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2791 if (!rank) { 2792 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2793 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2794 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2795 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2796 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2797 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2798 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2799 } else { 2800 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2801 } 2802 2803 if (!rank) { /* procs[0] */ 2804 /* calculate the number of nonzeros on each processor */ 2805 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2806 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2807 for (i=0; i<size; i++) { 2808 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2809 procsnz[i] += rowlengths[j]; 2810 } 2811 } 2812 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2813 2814 /* determine max buffer needed and allocate it */ 2815 maxnz = 0; 2816 for (i=0; i<size; i++) { 2817 maxnz = PetscMax(maxnz,procsnz[i]); 2818 } 2819 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2820 2821 /* read in my part of the matrix column indices */ 2822 nz = procsnz[0]; 2823 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2824 mycols = ibuf; 2825 if (size == 1) nz -= extra_rows; 2826 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2827 if (size == 1) { 2828 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2829 } 2830 2831 /* read in every ones (except the last) and ship off */ 2832 for (i=1; i<size-1; i++) { 2833 nz = procsnz[i]; 2834 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2835 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2836 } 2837 /* read in the stuff for the last proc */ 2838 if (size != 1) { 2839 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2840 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2841 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2842 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2843 } 2844 ierr = PetscFree(cols);CHKERRQ(ierr); 2845 } else { /* procs[i], i>0 */ 2846 /* determine buffer space needed for message */ 2847 nz = 0; 2848 for (i=0; i<m; i++) nz += locrowlens[i]; 2849 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2850 mycols = ibuf; 2851 /* receive message of column indices*/ 2852 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2853 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2854 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2855 } 2856 2857 /* loop over local rows, determining number of off diagonal entries */ 2858 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2859 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2860 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2861 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2862 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2863 rowcount = 0; 2864 nzcount = 0; 2865 for (i=0; i<mbs; i++) { 2866 dcount = 0; 2867 odcount = 0; 2868 for (j=0; j<bs; j++) { 2869 kmax = locrowlens[rowcount]; 2870 for (k=0; k<kmax; k++) { 2871 tmp = mycols[nzcount++]/bs; /* block col. index */ 2872 if (!mask[tmp]) { 2873 mask[tmp] = 1; 2874 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2875 else masked1[dcount++] = tmp; /* entry in diag portion */ 2876 } 2877 } 2878 rowcount++; 2879 } 2880 2881 dlens[i] = dcount; /* d_nzz[i] */ 2882 odlens[i] = odcount; /* o_nzz[i] */ 2883 2884 /* zero out the mask elements we set */ 2885 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2886 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2887 } 2888 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2889 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2890 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2891 2892 if (!rank) { 2893 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2894 /* read in my part of the matrix numerical values */ 2895 nz = procsnz[0]; 2896 vals = buf; 2897 mycols = ibuf; 2898 if (size == 1) nz -= extra_rows; 2899 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2900 if (size == 1) { 2901 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2902 } 2903 2904 /* insert into matrix */ 2905 jj = rstart*bs; 2906 for (i=0; i<m; i++) { 2907 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2908 mycols += locrowlens[i]; 2909 vals += locrowlens[i]; 2910 jj++; 2911 } 2912 2913 /* read in other processors (except the last one) and ship out */ 2914 for (i=1; i<size-1; i++) { 2915 nz = procsnz[i]; 2916 vals = buf; 2917 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2918 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2919 } 2920 /* the last proc */ 2921 if (size != 1) { 2922 nz = procsnz[i] - extra_rows; 2923 vals = buf; 2924 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2925 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2926 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2927 } 2928 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2929 2930 } else { 2931 /* receive numeric values */ 2932 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2933 2934 /* receive message of values*/ 2935 vals = buf; 2936 mycols = ibuf; 2937 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2938 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2939 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2940 2941 /* insert into matrix */ 2942 jj = rstart*bs; 2943 for (i=0; i<m; i++) { 2944 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2945 mycols += locrowlens[i]; 2946 vals += locrowlens[i]; 2947 jj++; 2948 } 2949 } 2950 2951 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2952 ierr = PetscFree(buf);CHKERRQ(ierr); 2953 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2954 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2955 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2956 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2957 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2958 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 PetscFunctionReturn(0); 2960 } 2961 2962 #undef __FUNCT__ 2963 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2964 /*XXXXX@ 2965 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2966 2967 Input Parameters: 2968 . mat - the matrix 2969 . fact - factor 2970 2971 Not Collective on Mat, each process can have a different hash factor 2972 2973 Level: advanced 2974 2975 Notes: 2976 This can also be set by the command line option: -mat_use_hash_table fact 2977 2978 .keywords: matrix, hashtable, factor, HT 2979 2980 .seealso: MatSetOption() 2981 @XXXXX*/ 2982 2983 2984 #undef __FUNCT__ 2985 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2986 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2987 { 2988 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2989 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2990 PetscReal atmp; 2991 PetscReal *work,*svalues,*rvalues; 2992 PetscErrorCode ierr; 2993 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2994 PetscMPIInt rank,size; 2995 PetscInt *rowners_bs,dest,count,source; 2996 PetscScalar *va; 2997 MatScalar *ba; 2998 MPI_Status stat; 2999 3000 PetscFunctionBegin; 3001 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 3002 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 3003 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3004 3005 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3006 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3007 3008 bs = A->rmap->bs; 3009 mbs = a->mbs; 3010 Mbs = a->Mbs; 3011 ba = b->a; 3012 bi = b->i; 3013 bj = b->j; 3014 3015 /* find ownerships */ 3016 rowners_bs = A->rmap->range; 3017 3018 /* each proc creates an array to be distributed */ 3019 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 3020 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 3021 3022 /* row_max for B */ 3023 if (rank != size-1) { 3024 for (i=0; i<mbs; i++) { 3025 ncols = bi[1] - bi[0]; bi++; 3026 brow = bs*i; 3027 for (j=0; j<ncols; j++) { 3028 bcol = bs*(*bj); 3029 for (kcol=0; kcol<bs; kcol++) { 3030 col = bcol + kcol; /* local col index */ 3031 col += rowners_bs[rank+1]; /* global col index */ 3032 for (krow=0; krow<bs; krow++) { 3033 atmp = PetscAbsScalar(*ba); ba++; 3034 row = brow + krow; /* local row index */ 3035 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 3036 if (work[col] < atmp) work[col] = atmp; 3037 } 3038 } 3039 bj++; 3040 } 3041 } 3042 3043 /* send values to its owners */ 3044 for (dest=rank+1; dest<size; dest++) { 3045 svalues = work + rowners_bs[dest]; 3046 count = rowners_bs[dest+1]-rowners_bs[dest]; 3047 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 3048 } 3049 } 3050 3051 /* receive values */ 3052 if (rank) { 3053 rvalues = work; 3054 count = rowners_bs[rank+1]-rowners_bs[rank]; 3055 for (source=0; source<rank; source++) { 3056 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 3057 /* process values */ 3058 for (i=0; i<count; i++) { 3059 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 3060 } 3061 } 3062 } 3063 3064 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3065 ierr = PetscFree(work);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "MatSOR_MPISBAIJ" 3071 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3072 { 3073 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3074 PetscErrorCode ierr; 3075 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 3076 PetscScalar *x,*ptr,*from; 3077 Vec bb1; 3078 const PetscScalar *b; 3079 3080 PetscFunctionBegin; 3081 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3082 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3083 3084 if (flag == SOR_APPLY_UPPER) { 3085 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3086 PetscFunctionReturn(0); 3087 } 3088 3089 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3090 if (flag & SOR_ZERO_INITIAL_GUESS) { 3091 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3092 its--; 3093 } 3094 3095 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3096 while (its--) { 3097 3098 /* lower triangular part: slvec0b = - B^T*xx */ 3099 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3100 3101 /* copy xx into slvec0a */ 3102 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3103 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3104 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3105 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3106 3107 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3108 3109 /* copy bb into slvec1a */ 3110 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3111 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3112 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3113 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3114 3115 /* set slvec1b = 0 */ 3116 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3117 3118 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3119 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3120 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3121 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3122 3123 /* upper triangular part: bb1 = bb1 - B*x */ 3124 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3125 3126 /* local diagonal sweep */ 3127 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3128 } 3129 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3130 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3131 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3132 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3133 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3134 } else if (flag & SOR_EISENSTAT) { 3135 Vec xx1; 3136 PetscBool hasop; 3137 const PetscScalar *diag; 3138 PetscScalar *sl,scale = (omega - 2.0)/omega; 3139 PetscInt i,n; 3140 3141 if (!mat->xx1) { 3142 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3143 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3144 } 3145 xx1 = mat->xx1; 3146 bb1 = mat->bb1; 3147 3148 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3149 3150 if (!mat->diag) { 3151 /* this is wrong for same matrix with new nonzero values */ 3152 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3153 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3154 } 3155 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3156 3157 if (hasop) { 3158 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3159 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3160 } else { 3161 /* 3162 These two lines are replaced by code that may be a bit faster for a good compiler 3163 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3164 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3165 */ 3166 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3167 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3168 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3169 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3170 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3171 if (omega == 1.0) { 3172 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3173 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3174 } else { 3175 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3176 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3177 } 3178 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3179 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3180 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3181 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3182 } 3183 3184 /* multiply off-diagonal portion of matrix */ 3185 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3186 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3187 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3188 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3189 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3190 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3191 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3192 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3193 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3194 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3195 3196 /* local sweep */ 3197 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3198 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3199 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3200 PetscFunctionReturn(0); 3201 } 3202 3203 #undef __FUNCT__ 3204 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 3205 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3206 { 3207 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3208 PetscErrorCode ierr; 3209 Vec lvec1,bb1; 3210 3211 PetscFunctionBegin; 3212 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3213 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3214 3215 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3216 if (flag & SOR_ZERO_INITIAL_GUESS) { 3217 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3218 its--; 3219 } 3220 3221 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3222 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3223 while (its--) { 3224 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3225 3226 /* lower diagonal part: bb1 = bb - B^T*xx */ 3227 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3228 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3229 3230 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3231 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3232 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3233 3234 /* upper diagonal part: bb1 = bb1 - B*x */ 3235 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3236 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3237 3238 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3239 3240 /* diagonal sweep */ 3241 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3242 } 3243 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3244 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3245 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3246 PetscFunctionReturn(0); 3247 } 3248 3249 #undef __FUNCT__ 3250 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3251 /*@ 3252 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3253 CSR format the local rows. 3254 3255 Collective on MPI_Comm 3256 3257 Input Parameters: 3258 + comm - MPI communicator 3259 . bs - the block size, only a block size of 1 is supported 3260 . m - number of local rows (Cannot be PETSC_DECIDE) 3261 . n - This value should be the same as the local size used in creating the 3262 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3263 calculated if N is given) For square matrices n is almost always m. 3264 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3265 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3266 . i - row indices 3267 . j - column indices 3268 - a - matrix values 3269 3270 Output Parameter: 3271 . mat - the matrix 3272 3273 Level: intermediate 3274 3275 Notes: 3276 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3277 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3278 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3279 3280 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3281 3282 .keywords: matrix, aij, compressed row, sparse, parallel 3283 3284 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3285 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3286 @*/ 3287 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3288 { 3289 PetscErrorCode ierr; 3290 3291 3292 PetscFunctionBegin; 3293 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3294 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3295 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3296 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3297 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3298 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3299 PetscFunctionReturn(0); 3300 } 3301 3302 3303 #undef __FUNCT__ 3304 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3305 /*@C 3306 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3307 (the default parallel PETSc format). 3308 3309 Collective on MPI_Comm 3310 3311 Input Parameters: 3312 + B - the matrix 3313 . bs - the block size 3314 . i - the indices into j for the start of each local row (starts with zero) 3315 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3316 - v - optional values in the matrix 3317 3318 Level: developer 3319 3320 .keywords: matrix, aij, compressed row, sparse, parallel 3321 3322 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3323 @*/ 3324 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3325 { 3326 PetscErrorCode ierr; 3327 3328 PetscFunctionBegin; 3329 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3330 PetscFunctionReturn(0); 3331 } 3332 3333 #undef __FUNCT__ 3334 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3335 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3336 { 3337 PetscErrorCode ierr; 3338 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3339 PetscInt *indx; 3340 PetscScalar *values; 3341 3342 PetscFunctionBegin; 3343 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3344 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3345 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3346 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3347 PetscInt *bindx,rmax=a->rmax,j; 3348 3349 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3350 mbs = m/bs; Nbs = N/cbs; 3351 if (n == PETSC_DECIDE) { 3352 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3353 } 3354 /* Check sum(n) = Nbs */ 3355 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3356 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3357 3358 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3359 rstart -= mbs; 3360 3361 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3362 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3363 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3364 for (i=0; i<mbs; i++) { 3365 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3366 nnz = nnz/bs; 3367 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3368 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3369 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3370 } 3371 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3372 ierr = PetscFree(bindx);CHKERRQ(ierr); 3373 3374 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3375 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3376 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3377 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3378 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3379 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3380 } 3381 3382 /* numeric phase */ 3383 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3384 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3385 3386 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3387 for (i=0; i<m; i++) { 3388 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3389 Ii = i + rstart; 3390 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3391 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3392 } 3393 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3394 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3395 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3396 PetscFunctionReturn(0); 3397 } 3398