1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 637 } 638 /* Bmat */ 639 v = bmat->a; jj = bmat->j; 640 for (brow=0; brow<mbs; brow++) { 641 grow = bs*(rstart + brow); 642 nz = bmat->i[brow+1] - bmat->i[brow]; 643 for (bcol=0; bcol<nz; bcol++) { 644 gcol = bs*garray[*jj]; jj++; 645 for (col=0; col<bs; col++) { 646 for (row=0; row<bs; row++) { 647 vabs = PetscAbsScalar(*v); v++; 648 rsum[gcol+col] += vabs; 649 rsum[grow+row] += vabs; 650 } 651 } 652 } 653 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 654 } 655 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 656 *norm = 0.0; 657 for (col=0; col<mat->cmap->N; col++) { 658 if (rsum2[col] > *norm) *norm = rsum2[col]; 659 } 660 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 668 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt nstash,reallocs; 673 674 PetscFunctionBegin; 675 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 676 677 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 678 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 679 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 680 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 681 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 682 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 688 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 689 { 690 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 691 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 692 PetscErrorCode ierr; 693 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 694 PetscInt *row,*col; 695 PetscBool other_disassembled; 696 PetscMPIInt n; 697 PetscBool r1,r2,r3; 698 MatScalar *val; 699 700 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 701 PetscFunctionBegin; 702 if (!baij->donotstash && !mat->nooffprocentries) { 703 while (1) { 704 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 705 if (!flg) break; 706 707 for (i=0; i<n;) { 708 /* Now identify the consecutive vals belonging to the same row */ 709 for (j=i,rstart=row[j]; j<n; j++) { 710 if (row[j] != rstart) break; 711 } 712 if (j < n) ncols = j-i; 713 else ncols = n-i; 714 /* Now assemble all these values with a single function call */ 715 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 716 i = j; 717 } 718 } 719 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 720 /* Now process the block-stash. Since the values are stashed column-oriented, 721 set the roworiented flag to column oriented, and after MatSetValues() 722 restore the original flags */ 723 r1 = baij->roworiented; 724 r2 = a->roworiented; 725 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 726 727 baij->roworiented = PETSC_FALSE; 728 a->roworiented = PETSC_FALSE; 729 730 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 731 while (1) { 732 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 733 if (!flg) break; 734 735 for (i=0; i<n;) { 736 /* Now identify the consecutive vals belonging to the same row */ 737 for (j=i,rstart=row[j]; j<n; j++) { 738 if (row[j] != rstart) break; 739 } 740 if (j < n) ncols = j-i; 741 else ncols = n-i; 742 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 743 i = j; 744 } 745 } 746 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 747 748 baij->roworiented = r1; 749 a->roworiented = r2; 750 751 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 752 } 753 754 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 755 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 756 757 /* determine if any processor has disassembled, if so we must 758 also disassemble ourselfs, in order that we may reassemble. */ 759 /* 760 if nonzero structure of submatrix B cannot change then we know that 761 no processor disassembled thus we can skip this stuff 762 */ 763 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 764 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 765 if (mat->was_assembled && !other_disassembled) { 766 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 767 } 768 } 769 770 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 771 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 772 } 773 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 774 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 775 776 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 777 778 baij->rowvalues = 0; 779 780 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 781 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 782 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 783 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 784 } 785 PetscFunctionReturn(0); 786 } 787 788 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer); 789 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 790 #include <petscdraw.h> 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 793 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 794 { 795 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 796 PetscErrorCode ierr; 797 PetscInt bs = mat->rmap->bs; 798 PetscMPIInt rank = baij->rank; 799 PetscBool iascii,isdraw; 800 PetscViewer sviewer; 801 PetscViewerFormat format; 802 803 PetscFunctionBegin; 804 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 805 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 806 if (iascii) { 807 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 808 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 809 MatInfo info; 810 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 811 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 812 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 814 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 816 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 818 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 819 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 820 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 821 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } else if (format == PETSC_VIEWER_ASCII_INFO) { 824 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 827 PetscFunctionReturn(0); 828 } 829 } 830 831 if (isdraw) { 832 PetscDraw draw; 833 PetscBool isnull; 834 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 835 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 836 if (isnull) PetscFunctionReturn(0); 837 } 838 839 { 840 /* assemble the entire matrix onto first processor. */ 841 Mat A; 842 Mat_SeqSBAIJ *Aloc; 843 Mat_SeqBAIJ *Bloc; 844 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 845 MatScalar *a; 846 const char *matname; 847 848 /* Should this be the same type as mat? */ 849 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 850 if (!rank) { 851 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 852 } else { 853 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 854 } 855 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 856 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 857 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 858 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 859 860 /* copy over the A part */ 861 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 862 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 863 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 864 865 for (i=0; i<mbs; i++) { 866 rvals[0] = bs*(baij->rstartbs + i); 867 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 868 for (j=ai[i]; j<ai[i+1]; j++) { 869 col = (baij->cstartbs+aj[j])*bs; 870 for (k=0; k<bs; k++) { 871 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 872 col++; 873 a += bs; 874 } 875 } 876 } 877 /* copy over the B part */ 878 Bloc = (Mat_SeqBAIJ*)baij->B->data; 879 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 880 for (i=0; i<mbs; i++) { 881 882 rvals[0] = bs*(baij->rstartbs + i); 883 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 884 for (j=ai[i]; j<ai[i+1]; j++) { 885 col = baij->garray[aj[j]]*bs; 886 for (k=0; k<bs; k++) { 887 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 888 col++; 889 a += bs; 890 } 891 } 892 } 893 ierr = PetscFree(rvals);CHKERRQ(ierr); 894 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 895 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 /* 897 Everyone has to call to draw the matrix since the graphics waits are 898 synchronized across all processors that share the PetscDraw object 899 */ 900 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 901 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 902 if (!rank) { 903 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 904 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 905 } 906 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 907 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 908 ierr = MatDestroy(&A);CHKERRQ(ierr); 909 } 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatView_MPISBAIJ_Binary" 915 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 916 { 917 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 918 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 919 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 920 PetscErrorCode ierr; 921 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 922 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 923 int fd; 924 PetscScalar *column_values; 925 FILE *file; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 PetscInt message_count,flowcontrolcount; 928 929 PetscFunctionBegin; 930 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 931 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 932 nz = bs2*(A->nz + B->nz); 933 rlen = mat->rmap->n; 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 if (!rank) { 936 header[0] = MAT_FILE_CLASSID; 937 header[1] = mat->rmap->N; 938 header[2] = mat->cmap->N; 939 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 941 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 942 /* get largest number of rows any processor has */ 943 range = mat->rmap->range; 944 for (i=1; i<size; i++) { 945 rlen = PetscMax(rlen,range[i+1] - range[i]); 946 } 947 } else { 948 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 949 } 950 951 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 952 /* compute lengths of each row */ 953 for (i=0; i<a->mbs; i++) { 954 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 955 } 956 /* store the row lengths to the file */ 957 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 958 if (!rank) { 959 MPI_Status status; 960 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 961 rlen = (range[1] - range[0])/bs; 962 for (i=0; i<rlen; i++) { 963 for (j=0; j<bs; j++) { 964 row_lens[i*bs+j] = bs*crow_lens[i]; 965 } 966 } 967 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 for (i=1; i<size; i++) { 969 rlen = (range[i+1] - range[i])/bs; 970 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 971 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 972 for (k=0; k<rlen; k++) { 973 for (j=0; j<bs; j++) { 974 row_lens[k*bs+j] = bs*crow_lens[k]; 975 } 976 } 977 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 978 } 979 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 980 ierr = PetscFree(row_lens);CHKERRQ(ierr); 981 } else { 982 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 983 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 984 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 985 } 986 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 987 988 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 989 information needed to make it for each row from a block row. This does require more communication but still not more than 990 the communication needed for the nonzero values */ 991 nzmax = nz; /* space a largest processor needs */ 992 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 993 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 994 cnt = 0; 995 for (i=0; i<a->mbs; i++) { 996 pcnt = cnt; 997 for (j=B->i[i]; j<B->i[i+1]; j++) { 998 if ((col = garray[B->j[j]]) > cstart) break; 999 for (l=0; l<bs; l++) { 1000 column_indices[cnt++] = bs*col+l; 1001 } 1002 } 1003 for (k=A->i[i]; k<A->i[i+1]; k++) { 1004 for (l=0; l<bs; l++) { 1005 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1006 } 1007 } 1008 for (; j<B->i[i+1]; j++) { 1009 for (l=0; l<bs; l++) { 1010 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1011 } 1012 } 1013 len = cnt - pcnt; 1014 for (k=1; k<bs; k++) { 1015 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1016 cnt += len; 1017 } 1018 } 1019 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1020 1021 /* store the columns to the file */ 1022 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1023 if (!rank) { 1024 MPI_Status status; 1025 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1026 for (i=1; i<size; i++) { 1027 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1028 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1029 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1030 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1031 } 1032 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1033 } else { 1034 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1035 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1036 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1037 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1038 } 1039 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1040 1041 /* load up the numerical values */ 1042 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1043 cnt = 0; 1044 for (i=0; i<a->mbs; i++) { 1045 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1046 for (j=B->i[i]; j<B->i[i+1]; j++) { 1047 if (garray[B->j[j]] > cstart) break; 1048 for (l=0; l<bs; l++) { 1049 for (ll=0; ll<bs; ll++) { 1050 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1051 } 1052 } 1053 cnt += bs; 1054 } 1055 for (k=A->i[i]; k<A->i[i+1]; k++) { 1056 for (l=0; l<bs; l++) { 1057 for (ll=0; ll<bs; ll++) { 1058 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1059 } 1060 } 1061 cnt += bs; 1062 } 1063 for (; j<B->i[i+1]; j++) { 1064 for (l=0; l<bs; l++) { 1065 for (ll=0; ll<bs; ll++) { 1066 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1067 } 1068 } 1069 cnt += bs; 1070 } 1071 cnt += (bs-1)*rlen; 1072 } 1073 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1074 1075 /* store the column values to the file */ 1076 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1077 if (!rank) { 1078 MPI_Status status; 1079 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1080 for (i=1; i<size; i++) { 1081 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1082 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1083 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1084 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1085 } 1086 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1087 } else { 1088 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1089 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1090 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1091 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1092 } 1093 ierr = PetscFree(column_values);CHKERRQ(ierr); 1094 1095 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1096 if (file) { 1097 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1098 } 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatView_MPISBAIJ" 1104 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1105 { 1106 PetscErrorCode ierr; 1107 PetscBool iascii,isdraw,issocket,isbinary; 1108 1109 PetscFunctionBegin; 1110 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1111 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1112 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1113 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1114 if (iascii || isdraw || issocket) { 1115 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1116 } else if (isbinary) { 1117 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1118 } 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatDestroy_MPISBAIJ" 1124 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1125 { 1126 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 #if defined(PETSC_USE_LOG) 1131 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1132 #endif 1133 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1134 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1135 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1136 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1137 #if defined(PETSC_USE_CTABLE) 1138 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1139 #else 1140 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1141 #endif 1142 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1143 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1144 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1145 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1146 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1147 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1148 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1149 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1150 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1151 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1152 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1153 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1154 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1155 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1156 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1157 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1158 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1159 #endif 1160 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1161 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1162 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1163 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1164 1165 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 1170 #if defined(PETSC_HAVE_ELEMENTAL) 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1172 #endif 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 1178 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1179 { 1180 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1183 PetscScalar *from; 1184 const PetscScalar *x; 1185 1186 PetscFunctionBegin; 1187 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1188 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1189 1190 /* diagonal part */ 1191 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1192 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1193 1194 /* subdiagonal part */ 1195 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1196 1197 /* copy x into the vec slvec0 */ 1198 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1199 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1200 1201 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1202 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1203 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1204 1205 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 /* supperdiagonal part */ 1208 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatMult_MPISBAIJ" 1214 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1215 { 1216 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1219 PetscScalar *from; 1220 const PetscScalar *x; 1221 1222 PetscFunctionBegin; 1223 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1224 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1225 1226 /* diagonal part */ 1227 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1228 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1229 1230 /* subdiagonal part */ 1231 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1232 1233 /* copy x into the vec slvec0 */ 1234 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1235 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1236 1237 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1238 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1239 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1240 1241 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1242 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243 /* supperdiagonal part */ 1244 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1250 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1251 { 1252 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1253 PetscErrorCode ierr; 1254 PetscInt nt; 1255 1256 PetscFunctionBegin; 1257 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1258 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1259 1260 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1261 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1262 1263 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 /* do diagonal part */ 1265 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1266 /* do supperdiagonal part */ 1267 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1269 /* do subdiagonal part */ 1270 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1278 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1279 { 1280 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1283 PetscScalar *from,zero=0.0; 1284 const PetscScalar *x; 1285 1286 PetscFunctionBegin; 1287 /* 1288 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1289 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1290 */ 1291 /* diagonal part */ 1292 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1293 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1294 1295 /* subdiagonal part */ 1296 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1297 1298 /* copy x into the vec slvec0 */ 1299 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1300 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1301 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1302 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1303 1304 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1306 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1307 1308 /* supperdiagonal part */ 1309 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1315 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1316 { 1317 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1322 /* do diagonal part */ 1323 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1324 /* do supperdiagonal part */ 1325 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1326 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1327 1328 /* do subdiagonal part */ 1329 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1330 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1331 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 /* 1336 This only works correctly for square matrices where the subblock A->A is the 1337 diagonal block 1338 */ 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1341 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1342 { 1343 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1348 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatScale_MPISBAIJ" 1354 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1355 { 1356 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1357 PetscErrorCode ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1361 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1367 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1368 { 1369 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1370 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1371 PetscErrorCode ierr; 1372 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1373 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1374 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1375 1376 PetscFunctionBegin; 1377 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1378 mat->getrowactive = PETSC_TRUE; 1379 1380 if (!mat->rowvalues && (idx || v)) { 1381 /* 1382 allocate enough space to hold information from the longest row. 1383 */ 1384 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1385 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1386 PetscInt max = 1,mbs = mat->mbs,tmp; 1387 for (i=0; i<mbs; i++) { 1388 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1389 if (max < tmp) max = tmp; 1390 } 1391 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1392 } 1393 1394 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1395 lrow = row - brstart; /* local row index */ 1396 1397 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1398 if (!v) {pvA = 0; pvB = 0;} 1399 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1400 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1401 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1402 nztot = nzA + nzB; 1403 1404 cmap = mat->garray; 1405 if (v || idx) { 1406 if (nztot) { 1407 /* Sort by increasing column numbers, assuming A and B already sorted */ 1408 PetscInt imark = -1; 1409 if (v) { 1410 *v = v_p = mat->rowvalues; 1411 for (i=0; i<nzB; i++) { 1412 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1413 else break; 1414 } 1415 imark = i; 1416 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1417 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1418 } 1419 if (idx) { 1420 *idx = idx_p = mat->rowindices; 1421 if (imark > -1) { 1422 for (i=0; i<imark; i++) { 1423 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1424 } 1425 } else { 1426 for (i=0; i<nzB; i++) { 1427 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1428 else break; 1429 } 1430 imark = i; 1431 } 1432 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1433 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1434 } 1435 } else { 1436 if (idx) *idx = 0; 1437 if (v) *v = 0; 1438 } 1439 } 1440 *nz = nztot; 1441 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1442 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1448 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1449 { 1450 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1451 1452 PetscFunctionBegin; 1453 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1454 baij->getrowactive = PETSC_FALSE; 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1460 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1461 { 1462 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1463 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1464 1465 PetscFunctionBegin; 1466 aA->getrow_utriangular = PETSC_TRUE; 1467 PetscFunctionReturn(0); 1468 } 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1471 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1472 { 1473 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1474 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1475 1476 PetscFunctionBegin; 1477 aA->getrow_utriangular = PETSC_FALSE; 1478 PetscFunctionReturn(0); 1479 } 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1483 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1484 { 1485 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1486 PetscErrorCode ierr; 1487 1488 PetscFunctionBegin; 1489 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1490 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1496 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1497 { 1498 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1499 PetscErrorCode ierr; 1500 1501 PetscFunctionBegin; 1502 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1503 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1508 Input: isrow - distributed(parallel), 1509 iscol_local - locally owned (seq) 1510 */ 1511 #undef __FUNCT__ 1512 #define __FUNCT__ "ISEqual_private" 1513 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1514 { 1515 PetscErrorCode ierr; 1516 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1517 const PetscInt *ptr1,*ptr2; 1518 1519 PetscFunctionBegin; 1520 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1521 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1522 if (sz1 > sz2) { 1523 *flg = PETSC_FALSE; 1524 PetscFunctionReturn(0); 1525 } 1526 1527 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1528 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1529 1530 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1531 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1532 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1533 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1534 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1535 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1536 1537 nmatch=0; 1538 k = 0; 1539 for (i=0; i<sz1; i++){ 1540 for (j=k; j<sz2; j++){ 1541 if (a1[i] == a2[j]) { 1542 k = j; nmatch++; 1543 break; 1544 } 1545 } 1546 } 1547 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1548 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1549 ierr = PetscFree(a1);CHKERRQ(ierr); 1550 ierr = PetscFree(a2);CHKERRQ(ierr); 1551 if (nmatch < sz1) { 1552 *flg = PETSC_FALSE; 1553 } else { 1554 *flg = PETSC_TRUE; 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1561 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1562 { 1563 PetscErrorCode ierr; 1564 IS iscol_local; 1565 PetscInt csize; 1566 PetscBool isequal; 1567 1568 PetscFunctionBegin; 1569 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1570 if (call == MAT_REUSE_MATRIX) { 1571 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1572 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1573 } else { 1574 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1575 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1576 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1577 } 1578 1579 /* now call MatGetSubMatrix_MPIBAIJ() */ 1580 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1581 if (call == MAT_INITIAL_MATRIX) { 1582 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1583 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1590 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1591 { 1592 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1597 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1603 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1604 { 1605 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1606 Mat A = a->A,B = a->B; 1607 PetscErrorCode ierr; 1608 PetscReal isend[5],irecv[5]; 1609 1610 PetscFunctionBegin; 1611 info->block_size = (PetscReal)matin->rmap->bs; 1612 1613 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1614 1615 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1616 isend[3] = info->memory; isend[4] = info->mallocs; 1617 1618 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1619 1620 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1621 isend[3] += info->memory; isend[4] += info->mallocs; 1622 if (flag == MAT_LOCAL) { 1623 info->nz_used = isend[0]; 1624 info->nz_allocated = isend[1]; 1625 info->nz_unneeded = isend[2]; 1626 info->memory = isend[3]; 1627 info->mallocs = isend[4]; 1628 } else if (flag == MAT_GLOBAL_MAX) { 1629 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1630 1631 info->nz_used = irecv[0]; 1632 info->nz_allocated = irecv[1]; 1633 info->nz_unneeded = irecv[2]; 1634 info->memory = irecv[3]; 1635 info->mallocs = irecv[4]; 1636 } else if (flag == MAT_GLOBAL_SUM) { 1637 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1638 1639 info->nz_used = irecv[0]; 1640 info->nz_allocated = irecv[1]; 1641 info->nz_unneeded = irecv[2]; 1642 info->memory = irecv[3]; 1643 info->mallocs = irecv[4]; 1644 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1645 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1646 info->fill_ratio_needed = 0; 1647 info->factor_mallocs = 0; 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1653 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1654 { 1655 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1656 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1657 PetscErrorCode ierr; 1658 1659 PetscFunctionBegin; 1660 switch (op) { 1661 case MAT_NEW_NONZERO_LOCATIONS: 1662 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1663 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1664 case MAT_KEEP_NONZERO_PATTERN: 1665 case MAT_SUBMAT_SINGLEIS: 1666 case MAT_NEW_NONZERO_LOCATION_ERR: 1667 MatCheckPreallocated(A,1); 1668 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1669 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1670 break; 1671 case MAT_ROW_ORIENTED: 1672 MatCheckPreallocated(A,1); 1673 a->roworiented = flg; 1674 1675 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1676 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1677 break; 1678 case MAT_NEW_DIAGONALS: 1679 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1680 break; 1681 case MAT_IGNORE_OFF_PROC_ENTRIES: 1682 a->donotstash = flg; 1683 break; 1684 case MAT_USE_HASH_TABLE: 1685 a->ht_flag = flg; 1686 break; 1687 case MAT_HERMITIAN: 1688 MatCheckPreallocated(A,1); 1689 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1690 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1691 1692 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1693 break; 1694 case MAT_SPD: 1695 A->spd_set = PETSC_TRUE; 1696 A->spd = flg; 1697 if (flg) { 1698 A->symmetric = PETSC_TRUE; 1699 A->structurally_symmetric = PETSC_TRUE; 1700 A->symmetric_set = PETSC_TRUE; 1701 A->structurally_symmetric_set = PETSC_TRUE; 1702 } 1703 break; 1704 case MAT_SYMMETRIC: 1705 MatCheckPreallocated(A,1); 1706 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1707 break; 1708 case MAT_STRUCTURALLY_SYMMETRIC: 1709 MatCheckPreallocated(A,1); 1710 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1711 break; 1712 case MAT_SYMMETRY_ETERNAL: 1713 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1714 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1715 break; 1716 case MAT_IGNORE_LOWER_TRIANGULAR: 1717 aA->ignore_ltriangular = flg; 1718 break; 1719 case MAT_ERROR_LOWER_TRIANGULAR: 1720 aA->ignore_ltriangular = flg; 1721 break; 1722 case MAT_GETROW_UPPERTRIANGULAR: 1723 aA->getrow_utriangular = flg; 1724 break; 1725 default: 1726 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1727 } 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1733 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1734 { 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 if (reuse == MAT_INITIAL_MATRIX) { 1739 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1740 } else if (reuse == MAT_REUSE_MATRIX) { 1741 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1742 } 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1748 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1749 { 1750 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1751 Mat a = baij->A, b=baij->B; 1752 PetscErrorCode ierr; 1753 PetscInt nv,m,n; 1754 PetscBool flg; 1755 1756 PetscFunctionBegin; 1757 if (ll != rr) { 1758 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1759 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1760 } 1761 if (!ll) PetscFunctionReturn(0); 1762 1763 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1764 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1765 1766 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1767 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1768 1769 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1770 1771 /* left diagonalscale the off-diagonal part */ 1772 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1773 1774 /* scale the diagonal part */ 1775 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1776 1777 /* right diagonalscale the off-diagonal part */ 1778 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1785 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1786 { 1787 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1788 PetscErrorCode ierr; 1789 1790 PetscFunctionBegin; 1791 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1796 1797 #undef __FUNCT__ 1798 #define __FUNCT__ "MatEqual_MPISBAIJ" 1799 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1800 { 1801 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1802 Mat a,b,c,d; 1803 PetscBool flg; 1804 PetscErrorCode ierr; 1805 1806 PetscFunctionBegin; 1807 a = matA->A; b = matA->B; 1808 c = matB->A; d = matB->B; 1809 1810 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1811 if (flg) { 1812 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1813 } 1814 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1815 PetscFunctionReturn(0); 1816 } 1817 1818 #undef __FUNCT__ 1819 #define __FUNCT__ "MatCopy_MPISBAIJ" 1820 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1821 { 1822 PetscErrorCode ierr; 1823 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1824 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1825 1826 PetscFunctionBegin; 1827 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1828 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1829 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1830 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1831 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1832 } else { 1833 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1834 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1835 } 1836 PetscFunctionReturn(0); 1837 } 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1841 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1842 { 1843 PetscErrorCode ierr; 1844 1845 PetscFunctionBegin; 1846 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1852 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1853 { 1854 PetscErrorCode ierr; 1855 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1856 PetscBLASInt bnz,one=1; 1857 Mat_SeqSBAIJ *xa,*ya; 1858 Mat_SeqBAIJ *xb,*yb; 1859 1860 PetscFunctionBegin; 1861 if (str == SAME_NONZERO_PATTERN) { 1862 PetscScalar alpha = a; 1863 xa = (Mat_SeqSBAIJ*)xx->A->data; 1864 ya = (Mat_SeqSBAIJ*)yy->A->data; 1865 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1866 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1867 xb = (Mat_SeqBAIJ*)xx->B->data; 1868 yb = (Mat_SeqBAIJ*)yy->B->data; 1869 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1870 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1871 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1872 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1873 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1874 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1875 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1876 } else { 1877 Mat B; 1878 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1879 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1880 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1881 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1882 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1883 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1884 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1885 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1886 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1887 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1888 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1889 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1890 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1891 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1892 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1893 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1894 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1895 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1896 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1897 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1904 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1905 { 1906 PetscErrorCode ierr; 1907 PetscInt i; 1908 PetscBool flg; 1909 1910 PetscFunctionBegin; 1911 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1912 for (i=0; i<n; i++) { 1913 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1914 if (!flg) { 1915 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1916 } 1917 } 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatShift_MPISBAIJ" 1923 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1924 { 1925 PetscErrorCode ierr; 1926 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1927 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1928 1929 PetscFunctionBegin; 1930 if (!Y->preallocated) { 1931 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1932 } else if (!aij->nz) { 1933 PetscInt nonew = aij->nonew; 1934 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1935 aij->nonew = nonew; 1936 } 1937 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ" 1943 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1944 { 1945 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1946 PetscErrorCode ierr; 1947 1948 PetscFunctionBegin; 1949 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1950 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1951 if (d) { 1952 PetscInt rstart; 1953 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1954 *d += rstart/A->rmap->bs; 1955 1956 } 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1962 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1963 { 1964 PetscFunctionBegin; 1965 *a = ((Mat_MPISBAIJ*)A->data)->A; 1966 PetscFunctionReturn(0); 1967 } 1968 1969 /* -------------------------------------------------------------------*/ 1970 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1971 MatGetRow_MPISBAIJ, 1972 MatRestoreRow_MPISBAIJ, 1973 MatMult_MPISBAIJ, 1974 /* 4*/ MatMultAdd_MPISBAIJ, 1975 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1976 MatMultAdd_MPISBAIJ, 1977 0, 1978 0, 1979 0, 1980 /* 10*/ 0, 1981 0, 1982 0, 1983 MatSOR_MPISBAIJ, 1984 MatTranspose_MPISBAIJ, 1985 /* 15*/ MatGetInfo_MPISBAIJ, 1986 MatEqual_MPISBAIJ, 1987 MatGetDiagonal_MPISBAIJ, 1988 MatDiagonalScale_MPISBAIJ, 1989 MatNorm_MPISBAIJ, 1990 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1991 MatAssemblyEnd_MPISBAIJ, 1992 MatSetOption_MPISBAIJ, 1993 MatZeroEntries_MPISBAIJ, 1994 /* 24*/ 0, 1995 0, 1996 0, 1997 0, 1998 0, 1999 /* 29*/ MatSetUp_MPISBAIJ, 2000 0, 2001 0, 2002 MatGetDiagonalBlock_MPISBAIJ, 2003 0, 2004 /* 34*/ MatDuplicate_MPISBAIJ, 2005 0, 2006 0, 2007 0, 2008 0, 2009 /* 39*/ MatAXPY_MPISBAIJ, 2010 MatGetSubMatrices_MPISBAIJ, 2011 MatIncreaseOverlap_MPISBAIJ, 2012 MatGetValues_MPISBAIJ, 2013 MatCopy_MPISBAIJ, 2014 /* 44*/ 0, 2015 MatScale_MPISBAIJ, 2016 MatShift_MPISBAIJ, 2017 0, 2018 0, 2019 /* 49*/ 0, 2020 0, 2021 0, 2022 0, 2023 0, 2024 /* 54*/ 0, 2025 0, 2026 MatSetUnfactored_MPISBAIJ, 2027 0, 2028 MatSetValuesBlocked_MPISBAIJ, 2029 /* 59*/ MatGetSubMatrix_MPISBAIJ, 2030 0, 2031 0, 2032 0, 2033 0, 2034 /* 64*/ 0, 2035 0, 2036 0, 2037 0, 2038 0, 2039 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 2040 0, 2041 0, 2042 0, 2043 0, 2044 /* 74*/ 0, 2045 0, 2046 0, 2047 0, 2048 0, 2049 /* 79*/ 0, 2050 0, 2051 0, 2052 0, 2053 MatLoad_MPISBAIJ, 2054 /* 84*/ 0, 2055 0, 2056 0, 2057 0, 2058 0, 2059 /* 89*/ 0, 2060 0, 2061 0, 2062 0, 2063 0, 2064 /* 94*/ 0, 2065 0, 2066 0, 2067 0, 2068 0, 2069 /* 99*/ 0, 2070 0, 2071 0, 2072 0, 2073 0, 2074 /*104*/ 0, 2075 MatRealPart_MPISBAIJ, 2076 MatImaginaryPart_MPISBAIJ, 2077 MatGetRowUpperTriangular_MPISBAIJ, 2078 MatRestoreRowUpperTriangular_MPISBAIJ, 2079 /*109*/ 0, 2080 0, 2081 0, 2082 0, 2083 MatMissingDiagonal_MPISBAIJ, 2084 /*114*/ 0, 2085 0, 2086 0, 2087 0, 2088 0, 2089 /*119*/ 0, 2090 0, 2091 0, 2092 0, 2093 0, 2094 /*124*/ 0, 2095 0, 2096 0, 2097 0, 2098 0, 2099 /*129*/ 0, 2100 0, 2101 0, 2102 0, 2103 0, 2104 /*134*/ 0, 2105 0, 2106 0, 2107 0, 2108 0, 2109 /*139*/ MatSetBlockSizes_Default, 2110 0, 2111 0, 2112 0, 2113 0, 2114 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2115 }; 2116 2117 #undef __FUNCT__ 2118 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 2119 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2120 { 2121 Mat_MPISBAIJ *b; 2122 PetscErrorCode ierr; 2123 PetscInt i,mbs,Mbs; 2124 2125 PetscFunctionBegin; 2126 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2127 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2128 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2129 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2130 2131 b = (Mat_MPISBAIJ*)B->data; 2132 mbs = B->rmap->n/bs; 2133 Mbs = B->rmap->N/bs; 2134 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2135 2136 B->rmap->bs = bs; 2137 b->bs2 = bs*bs; 2138 b->mbs = mbs; 2139 b->Mbs = Mbs; 2140 b->nbs = B->cmap->n/bs; 2141 b->Nbs = B->cmap->N/bs; 2142 2143 for (i=0; i<=b->size; i++) { 2144 b->rangebs[i] = B->rmap->range[i]/bs; 2145 } 2146 b->rstartbs = B->rmap->rstart/bs; 2147 b->rendbs = B->rmap->rend/bs; 2148 2149 b->cstartbs = B->cmap->rstart/bs; 2150 b->cendbs = B->cmap->rend/bs; 2151 2152 #if defined(PETSC_USE_CTABLE) 2153 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2154 #else 2155 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2156 #endif 2157 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2158 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2159 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2160 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2161 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2162 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2163 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2164 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2165 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2166 2167 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2168 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2169 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2170 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2171 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2172 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2173 2174 if (!B->preallocated) { 2175 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2176 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2177 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2178 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2179 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2180 } 2181 2182 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2183 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2184 2185 B->preallocated = PETSC_TRUE; 2186 B->was_assembled = PETSC_FALSE; 2187 B->assembled = PETSC_FALSE; 2188 PetscFunctionReturn(0); 2189 } 2190 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 2193 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2194 { 2195 PetscInt m,rstart,cstart,cend; 2196 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2197 const PetscInt *JJ =0; 2198 PetscScalar *values=0; 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2203 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2204 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2205 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2206 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2207 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2208 m = B->rmap->n/bs; 2209 rstart = B->rmap->rstart/bs; 2210 cstart = B->cmap->rstart/bs; 2211 cend = B->cmap->rend/bs; 2212 2213 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2214 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2215 for (i=0; i<m; i++) { 2216 nz = ii[i+1] - ii[i]; 2217 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2218 nz_max = PetscMax(nz_max,nz); 2219 JJ = jj + ii[i]; 2220 for (j=0; j<nz; j++) { 2221 if (*JJ >= cstart) break; 2222 JJ++; 2223 } 2224 d = 0; 2225 for (; j<nz; j++) { 2226 if (*JJ++ >= cend) break; 2227 d++; 2228 } 2229 d_nnz[i] = d; 2230 o_nnz[i] = nz - d; 2231 } 2232 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2233 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2234 2235 values = (PetscScalar*)V; 2236 if (!values) { 2237 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2238 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2239 } 2240 for (i=0; i<m; i++) { 2241 PetscInt row = i + rstart; 2242 PetscInt ncols = ii[i+1] - ii[i]; 2243 const PetscInt *icols = jj + ii[i]; 2244 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2245 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2246 } 2247 2248 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2249 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2250 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2251 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2252 PetscFunctionReturn(0); 2253 } 2254 2255 /*MC 2256 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2257 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2258 the matrix is stored. 2259 2260 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2261 can call MatSetOption(Mat, MAT_HERMITIAN); 2262 2263 Options Database Keys: 2264 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2265 2266 Level: beginner 2267 2268 .seealso: MatCreateMPISBAIJ 2269 M*/ 2270 2271 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "MatCreate_MPISBAIJ" 2275 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2276 { 2277 Mat_MPISBAIJ *b; 2278 PetscErrorCode ierr; 2279 PetscBool flg = PETSC_FALSE; 2280 2281 PetscFunctionBegin; 2282 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2283 B->data = (void*)b; 2284 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2285 2286 B->ops->destroy = MatDestroy_MPISBAIJ; 2287 B->ops->view = MatView_MPISBAIJ; 2288 B->assembled = PETSC_FALSE; 2289 B->insertmode = NOT_SET_VALUES; 2290 2291 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2292 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2293 2294 /* build local table of row and column ownerships */ 2295 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2296 2297 /* build cache for off array entries formed */ 2298 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2299 2300 b->donotstash = PETSC_FALSE; 2301 b->colmap = NULL; 2302 b->garray = NULL; 2303 b->roworiented = PETSC_TRUE; 2304 2305 /* stuff used in block assembly */ 2306 b->barray = 0; 2307 2308 /* stuff used for matrix vector multiply */ 2309 b->lvec = 0; 2310 b->Mvctx = 0; 2311 b->slvec0 = 0; 2312 b->slvec0b = 0; 2313 b->slvec1 = 0; 2314 b->slvec1a = 0; 2315 b->slvec1b = 0; 2316 b->sMvctx = 0; 2317 2318 /* stuff for MatGetRow() */ 2319 b->rowindices = 0; 2320 b->rowvalues = 0; 2321 b->getrowactive = PETSC_FALSE; 2322 2323 /* hash table stuff */ 2324 b->ht = 0; 2325 b->hd = 0; 2326 b->ht_size = 0; 2327 b->ht_flag = PETSC_FALSE; 2328 b->ht_fact = 0; 2329 b->ht_total_ct = 0; 2330 b->ht_insert_ct = 0; 2331 2332 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2333 b->ijonly = PETSC_FALSE; 2334 2335 b->in_loc = 0; 2336 b->v_loc = 0; 2337 b->n_loc = 0; 2338 2339 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2340 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2341 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2342 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2343 #if defined(PETSC_HAVE_ELEMENTAL) 2344 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2345 #endif 2346 2347 B->symmetric = PETSC_TRUE; 2348 B->structurally_symmetric = PETSC_TRUE; 2349 B->symmetric_set = PETSC_TRUE; 2350 B->structurally_symmetric_set = PETSC_TRUE; 2351 2352 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2353 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2354 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2355 if (flg) { 2356 PetscReal fact = 1.39; 2357 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2358 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2359 if (fact <= 1.0) fact = 1.39; 2360 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2361 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2362 } 2363 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2364 PetscFunctionReturn(0); 2365 } 2366 2367 /*MC 2368 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2369 2370 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2371 and MATMPISBAIJ otherwise. 2372 2373 Options Database Keys: 2374 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2375 2376 Level: beginner 2377 2378 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2379 M*/ 2380 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2383 /*@C 2384 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2385 the user should preallocate the matrix storage by setting the parameters 2386 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2387 performance can be increased by more than a factor of 50. 2388 2389 Collective on Mat 2390 2391 Input Parameters: 2392 + B - the matrix 2393 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2394 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2395 . d_nz - number of block nonzeros per block row in diagonal portion of local 2396 submatrix (same for all local rows) 2397 . d_nnz - array containing the number of block nonzeros in the various block rows 2398 in the upper triangular and diagonal part of the in diagonal portion of the local 2399 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2400 for the diagonal entry and set a value even if it is zero. 2401 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2402 submatrix (same for all local rows). 2403 - o_nnz - array containing the number of nonzeros in the various block rows of the 2404 off-diagonal portion of the local submatrix that is right of the diagonal 2405 (possibly different for each block row) or NULL. 2406 2407 2408 Options Database Keys: 2409 . -mat_no_unroll - uses code that does not unroll the loops in the 2410 block calculations (much slower) 2411 . -mat_block_size - size of the blocks to use 2412 2413 Notes: 2414 2415 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2416 than it must be used on all processors that share the object for that argument. 2417 2418 If the *_nnz parameter is given then the *_nz parameter is ignored 2419 2420 Storage Information: 2421 For a square global matrix we define each processor's diagonal portion 2422 to be its local rows and the corresponding columns (a square submatrix); 2423 each processor's off-diagonal portion encompasses the remainder of the 2424 local matrix (a rectangular submatrix). 2425 2426 The user can specify preallocated storage for the diagonal part of 2427 the local submatrix with either d_nz or d_nnz (not both). Set 2428 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2429 memory allocation. Likewise, specify preallocated storage for the 2430 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2431 2432 You can call MatGetInfo() to get information on how effective the preallocation was; 2433 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2434 You can also run with the option -info and look for messages with the string 2435 malloc in them to see if additional memory allocation was needed. 2436 2437 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2438 the figure below we depict these three local rows and all columns (0-11). 2439 2440 .vb 2441 0 1 2 3 4 5 6 7 8 9 10 11 2442 -------------------------- 2443 row 3 |. . . d d d o o o o o o 2444 row 4 |. . . d d d o o o o o o 2445 row 5 |. . . d d d o o o o o o 2446 -------------------------- 2447 .ve 2448 2449 Thus, any entries in the d locations are stored in the d (diagonal) 2450 submatrix, and any entries in the o locations are stored in the 2451 o (off-diagonal) submatrix. Note that the d matrix is stored in 2452 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2453 2454 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2455 plus the diagonal part of the d matrix, 2456 and o_nz should indicate the number of block nonzeros per row in the o matrix 2457 2458 In general, for PDE problems in which most nonzeros are near the diagonal, 2459 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2460 or you will get TERRIBLE performance; see the users' manual chapter on 2461 matrices. 2462 2463 Level: intermediate 2464 2465 .keywords: matrix, block, aij, compressed row, sparse, parallel 2466 2467 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2468 @*/ 2469 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2470 { 2471 PetscErrorCode ierr; 2472 2473 PetscFunctionBegin; 2474 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2475 PetscValidType(B,1); 2476 PetscValidLogicalCollectiveInt(B,bs,2); 2477 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatCreateSBAIJ" 2483 /*@C 2484 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2485 (block compressed row). For good matrix assembly performance 2486 the user should preallocate the matrix storage by setting the parameters 2487 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2488 performance can be increased by more than a factor of 50. 2489 2490 Collective on MPI_Comm 2491 2492 Input Parameters: 2493 + comm - MPI communicator 2494 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2495 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2496 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2497 This value should be the same as the local size used in creating the 2498 y vector for the matrix-vector product y = Ax. 2499 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2500 This value should be the same as the local size used in creating the 2501 x vector for the matrix-vector product y = Ax. 2502 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2503 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2504 . d_nz - number of block nonzeros per block row in diagonal portion of local 2505 submatrix (same for all local rows) 2506 . d_nnz - array containing the number of block nonzeros in the various block rows 2507 in the upper triangular portion of the in diagonal portion of the local 2508 (possibly different for each block block row) or NULL. 2509 If you plan to factor the matrix you must leave room for the diagonal entry and 2510 set its value even if it is zero. 2511 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2512 submatrix (same for all local rows). 2513 - o_nnz - array containing the number of nonzeros in the various block rows of the 2514 off-diagonal portion of the local submatrix (possibly different for 2515 each block row) or NULL. 2516 2517 Output Parameter: 2518 . A - the matrix 2519 2520 Options Database Keys: 2521 . -mat_no_unroll - uses code that does not unroll the loops in the 2522 block calculations (much slower) 2523 . -mat_block_size - size of the blocks to use 2524 . -mat_mpi - use the parallel matrix data structures even on one processor 2525 (defaults to using SeqBAIJ format on one processor) 2526 2527 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2528 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2529 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2530 2531 Notes: 2532 The number of rows and columns must be divisible by blocksize. 2533 This matrix type does not support complex Hermitian operation. 2534 2535 The user MUST specify either the local or global matrix dimensions 2536 (possibly both). 2537 2538 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2539 than it must be used on all processors that share the object for that argument. 2540 2541 If the *_nnz parameter is given then the *_nz parameter is ignored 2542 2543 Storage Information: 2544 For a square global matrix we define each processor's diagonal portion 2545 to be its local rows and the corresponding columns (a square submatrix); 2546 each processor's off-diagonal portion encompasses the remainder of the 2547 local matrix (a rectangular submatrix). 2548 2549 The user can specify preallocated storage for the diagonal part of 2550 the local submatrix with either d_nz or d_nnz (not both). Set 2551 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2552 memory allocation. Likewise, specify preallocated storage for the 2553 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2554 2555 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2556 the figure below we depict these three local rows and all columns (0-11). 2557 2558 .vb 2559 0 1 2 3 4 5 6 7 8 9 10 11 2560 -------------------------- 2561 row 3 |. . . d d d o o o o o o 2562 row 4 |. . . d d d o o o o o o 2563 row 5 |. . . d d d o o o o o o 2564 -------------------------- 2565 .ve 2566 2567 Thus, any entries in the d locations are stored in the d (diagonal) 2568 submatrix, and any entries in the o locations are stored in the 2569 o (off-diagonal) submatrix. Note that the d matrix is stored in 2570 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2571 2572 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2573 plus the diagonal part of the d matrix, 2574 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2575 In general, for PDE problems in which most nonzeros are near the diagonal, 2576 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2577 or you will get TERRIBLE performance; see the users' manual chapter on 2578 matrices. 2579 2580 Level: intermediate 2581 2582 .keywords: matrix, block, aij, compressed row, sparse, parallel 2583 2584 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2585 @*/ 2586 2587 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2588 { 2589 PetscErrorCode ierr; 2590 PetscMPIInt size; 2591 2592 PetscFunctionBegin; 2593 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2594 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2595 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2596 if (size > 1) { 2597 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2598 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2599 } else { 2600 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2601 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2602 } 2603 PetscFunctionReturn(0); 2604 } 2605 2606 2607 #undef __FUNCT__ 2608 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2609 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2610 { 2611 Mat mat; 2612 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2613 PetscErrorCode ierr; 2614 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2615 PetscScalar *array; 2616 2617 PetscFunctionBegin; 2618 *newmat = 0; 2619 2620 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2621 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2622 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2623 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2624 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2625 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2626 2627 mat->factortype = matin->factortype; 2628 mat->preallocated = PETSC_TRUE; 2629 mat->assembled = PETSC_TRUE; 2630 mat->insertmode = NOT_SET_VALUES; 2631 2632 a = (Mat_MPISBAIJ*)mat->data; 2633 a->bs2 = oldmat->bs2; 2634 a->mbs = oldmat->mbs; 2635 a->nbs = oldmat->nbs; 2636 a->Mbs = oldmat->Mbs; 2637 a->Nbs = oldmat->Nbs; 2638 2639 2640 a->size = oldmat->size; 2641 a->rank = oldmat->rank; 2642 a->donotstash = oldmat->donotstash; 2643 a->roworiented = oldmat->roworiented; 2644 a->rowindices = 0; 2645 a->rowvalues = 0; 2646 a->getrowactive = PETSC_FALSE; 2647 a->barray = 0; 2648 a->rstartbs = oldmat->rstartbs; 2649 a->rendbs = oldmat->rendbs; 2650 a->cstartbs = oldmat->cstartbs; 2651 a->cendbs = oldmat->cendbs; 2652 2653 /* hash table stuff */ 2654 a->ht = 0; 2655 a->hd = 0; 2656 a->ht_size = 0; 2657 a->ht_flag = oldmat->ht_flag; 2658 a->ht_fact = oldmat->ht_fact; 2659 a->ht_total_ct = 0; 2660 a->ht_insert_ct = 0; 2661 2662 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2663 if (oldmat->colmap) { 2664 #if defined(PETSC_USE_CTABLE) 2665 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2666 #else 2667 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2668 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2669 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2670 #endif 2671 } else a->colmap = 0; 2672 2673 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2674 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2675 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2676 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2677 } else a->garray = 0; 2678 2679 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2680 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2681 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2682 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2683 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2684 2685 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2686 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2687 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2688 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2689 2690 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2691 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2692 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2693 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2694 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2695 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2696 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2697 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2698 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2699 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2700 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2701 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2702 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2703 2704 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2705 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2706 a->sMvctx = oldmat->sMvctx; 2707 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2708 2709 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2710 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2711 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2712 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2713 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2714 *newmat = mat; 2715 PetscFunctionReturn(0); 2716 } 2717 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatLoad_MPISBAIJ" 2720 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2721 { 2722 PetscErrorCode ierr; 2723 PetscInt i,nz,j,rstart,rend; 2724 PetscScalar *vals,*buf; 2725 MPI_Comm comm; 2726 MPI_Status status; 2727 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2728 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2729 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2730 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2731 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2732 PetscInt dcount,kmax,k,nzcount,tmp; 2733 int fd; 2734 2735 PetscFunctionBegin; 2736 /* force binary viewer to load .info file if it has not yet done so */ 2737 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2738 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2739 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2740 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2741 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2742 if (bs < 0) bs = 1; 2743 2744 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2745 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2746 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2747 if (!rank) { 2748 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2749 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2750 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2751 } 2752 2753 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2754 M = header[1]; 2755 N = header[2]; 2756 2757 /* If global sizes are set, check if they are consistent with that given in the file */ 2758 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2759 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2760 2761 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2762 2763 /* 2764 This code adds extra rows to make sure the number of rows is 2765 divisible by the blocksize 2766 */ 2767 Mbs = M/bs; 2768 extra_rows = bs - M + bs*(Mbs); 2769 if (extra_rows == bs) extra_rows = 0; 2770 else Mbs++; 2771 if (extra_rows &&!rank) { 2772 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2773 } 2774 2775 /* determine ownership of all rows */ 2776 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2777 mbs = Mbs/size + ((Mbs % size) > rank); 2778 m = mbs*bs; 2779 } else { /* User Set */ 2780 m = newmat->rmap->n; 2781 mbs = m/bs; 2782 } 2783 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2784 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2785 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2786 rowners[0] = 0; 2787 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2788 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2789 rstart = rowners[rank]; 2790 rend = rowners[rank+1]; 2791 2792 /* distribute row lengths to all processors */ 2793 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2794 if (!rank) { 2795 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2796 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2797 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2798 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2799 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2800 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2801 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2802 } else { 2803 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2804 } 2805 2806 if (!rank) { /* procs[0] */ 2807 /* calculate the number of nonzeros on each processor */ 2808 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2809 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2810 for (i=0; i<size; i++) { 2811 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2812 procsnz[i] += rowlengths[j]; 2813 } 2814 } 2815 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2816 2817 /* determine max buffer needed and allocate it */ 2818 maxnz = 0; 2819 for (i=0; i<size; i++) { 2820 maxnz = PetscMax(maxnz,procsnz[i]); 2821 } 2822 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2823 2824 /* read in my part of the matrix column indices */ 2825 nz = procsnz[0]; 2826 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2827 mycols = ibuf; 2828 if (size == 1) nz -= extra_rows; 2829 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2830 if (size == 1) { 2831 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2832 } 2833 2834 /* read in every ones (except the last) and ship off */ 2835 for (i=1; i<size-1; i++) { 2836 nz = procsnz[i]; 2837 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2838 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2839 } 2840 /* read in the stuff for the last proc */ 2841 if (size != 1) { 2842 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2843 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2844 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2845 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2846 } 2847 ierr = PetscFree(cols);CHKERRQ(ierr); 2848 } else { /* procs[i], i>0 */ 2849 /* determine buffer space needed for message */ 2850 nz = 0; 2851 for (i=0; i<m; i++) nz += locrowlens[i]; 2852 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2853 mycols = ibuf; 2854 /* receive message of column indices*/ 2855 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2856 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2857 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2858 } 2859 2860 /* loop over local rows, determining number of off diagonal entries */ 2861 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2862 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2863 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2864 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2865 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2866 rowcount = 0; 2867 nzcount = 0; 2868 for (i=0; i<mbs; i++) { 2869 dcount = 0; 2870 odcount = 0; 2871 for (j=0; j<bs; j++) { 2872 kmax = locrowlens[rowcount]; 2873 for (k=0; k<kmax; k++) { 2874 tmp = mycols[nzcount++]/bs; /* block col. index */ 2875 if (!mask[tmp]) { 2876 mask[tmp] = 1; 2877 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2878 else masked1[dcount++] = tmp; /* entry in diag portion */ 2879 } 2880 } 2881 rowcount++; 2882 } 2883 2884 dlens[i] = dcount; /* d_nzz[i] */ 2885 odlens[i] = odcount; /* o_nzz[i] */ 2886 2887 /* zero out the mask elements we set */ 2888 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2889 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2890 } 2891 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2892 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2893 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2894 2895 if (!rank) { 2896 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2897 /* read in my part of the matrix numerical values */ 2898 nz = procsnz[0]; 2899 vals = buf; 2900 mycols = ibuf; 2901 if (size == 1) nz -= extra_rows; 2902 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2903 if (size == 1) { 2904 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2905 } 2906 2907 /* insert into matrix */ 2908 jj = rstart*bs; 2909 for (i=0; i<m; i++) { 2910 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2911 mycols += locrowlens[i]; 2912 vals += locrowlens[i]; 2913 jj++; 2914 } 2915 2916 /* read in other processors (except the last one) and ship out */ 2917 for (i=1; i<size-1; i++) { 2918 nz = procsnz[i]; 2919 vals = buf; 2920 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2921 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2922 } 2923 /* the last proc */ 2924 if (size != 1) { 2925 nz = procsnz[i] - extra_rows; 2926 vals = buf; 2927 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2928 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2929 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2930 } 2931 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2932 2933 } else { 2934 /* receive numeric values */ 2935 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2936 2937 /* receive message of values*/ 2938 vals = buf; 2939 mycols = ibuf; 2940 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2941 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2942 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2943 2944 /* insert into matrix */ 2945 jj = rstart*bs; 2946 for (i=0; i<m; i++) { 2947 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2948 mycols += locrowlens[i]; 2949 vals += locrowlens[i]; 2950 jj++; 2951 } 2952 } 2953 2954 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2955 ierr = PetscFree(buf);CHKERRQ(ierr); 2956 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2957 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2958 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2959 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2960 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2961 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2962 PetscFunctionReturn(0); 2963 } 2964 2965 #undef __FUNCT__ 2966 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2967 /*XXXXX@ 2968 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2969 2970 Input Parameters: 2971 . mat - the matrix 2972 . fact - factor 2973 2974 Not Collective on Mat, each process can have a different hash factor 2975 2976 Level: advanced 2977 2978 Notes: 2979 This can also be set by the command line option: -mat_use_hash_table fact 2980 2981 .keywords: matrix, hashtable, factor, HT 2982 2983 .seealso: MatSetOption() 2984 @XXXXX*/ 2985 2986 2987 #undef __FUNCT__ 2988 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2989 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2990 { 2991 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2992 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2993 PetscReal atmp; 2994 PetscReal *work,*svalues,*rvalues; 2995 PetscErrorCode ierr; 2996 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2997 PetscMPIInt rank,size; 2998 PetscInt *rowners_bs,dest,count,source; 2999 PetscScalar *va; 3000 MatScalar *ba; 3001 MPI_Status stat; 3002 3003 PetscFunctionBegin; 3004 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 3005 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 3006 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3007 3008 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3009 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3010 3011 bs = A->rmap->bs; 3012 mbs = a->mbs; 3013 Mbs = a->Mbs; 3014 ba = b->a; 3015 bi = b->i; 3016 bj = b->j; 3017 3018 /* find ownerships */ 3019 rowners_bs = A->rmap->range; 3020 3021 /* each proc creates an array to be distributed */ 3022 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 3023 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 3024 3025 /* row_max for B */ 3026 if (rank != size-1) { 3027 for (i=0; i<mbs; i++) { 3028 ncols = bi[1] - bi[0]; bi++; 3029 brow = bs*i; 3030 for (j=0; j<ncols; j++) { 3031 bcol = bs*(*bj); 3032 for (kcol=0; kcol<bs; kcol++) { 3033 col = bcol + kcol; /* local col index */ 3034 col += rowners_bs[rank+1]; /* global col index */ 3035 for (krow=0; krow<bs; krow++) { 3036 atmp = PetscAbsScalar(*ba); ba++; 3037 row = brow + krow; /* local row index */ 3038 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 3039 if (work[col] < atmp) work[col] = atmp; 3040 } 3041 } 3042 bj++; 3043 } 3044 } 3045 3046 /* send values to its owners */ 3047 for (dest=rank+1; dest<size; dest++) { 3048 svalues = work + rowners_bs[dest]; 3049 count = rowners_bs[dest+1]-rowners_bs[dest]; 3050 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 3051 } 3052 } 3053 3054 /* receive values */ 3055 if (rank) { 3056 rvalues = work; 3057 count = rowners_bs[rank+1]-rowners_bs[rank]; 3058 for (source=0; source<rank; source++) { 3059 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 3060 /* process values */ 3061 for (i=0; i<count; i++) { 3062 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 3063 } 3064 } 3065 } 3066 3067 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3068 ierr = PetscFree(work);CHKERRQ(ierr); 3069 PetscFunctionReturn(0); 3070 } 3071 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "MatSOR_MPISBAIJ" 3074 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3075 { 3076 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3077 PetscErrorCode ierr; 3078 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 3079 PetscScalar *x,*ptr,*from; 3080 Vec bb1; 3081 const PetscScalar *b; 3082 3083 PetscFunctionBegin; 3084 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3085 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3086 3087 if (flag == SOR_APPLY_UPPER) { 3088 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3089 PetscFunctionReturn(0); 3090 } 3091 3092 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3093 if (flag & SOR_ZERO_INITIAL_GUESS) { 3094 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3095 its--; 3096 } 3097 3098 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3099 while (its--) { 3100 3101 /* lower triangular part: slvec0b = - B^T*xx */ 3102 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3103 3104 /* copy xx into slvec0a */ 3105 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3106 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3107 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3108 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3109 3110 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3111 3112 /* copy bb into slvec1a */ 3113 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3114 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3115 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3116 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3117 3118 /* set slvec1b = 0 */ 3119 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3120 3121 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3122 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3123 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3124 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3125 3126 /* upper triangular part: bb1 = bb1 - B*x */ 3127 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3128 3129 /* local diagonal sweep */ 3130 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3131 } 3132 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3133 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3134 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3135 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3136 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3137 } else if (flag & SOR_EISENSTAT) { 3138 Vec xx1; 3139 PetscBool hasop; 3140 const PetscScalar *diag; 3141 PetscScalar *sl,scale = (omega - 2.0)/omega; 3142 PetscInt i,n; 3143 3144 if (!mat->xx1) { 3145 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3146 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3147 } 3148 xx1 = mat->xx1; 3149 bb1 = mat->bb1; 3150 3151 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3152 3153 if (!mat->diag) { 3154 /* this is wrong for same matrix with new nonzero values */ 3155 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3156 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3157 } 3158 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3159 3160 if (hasop) { 3161 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3162 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3163 } else { 3164 /* 3165 These two lines are replaced by code that may be a bit faster for a good compiler 3166 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3167 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3168 */ 3169 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3170 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3171 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3172 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3173 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3174 if (omega == 1.0) { 3175 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3176 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3177 } else { 3178 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3179 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3180 } 3181 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3182 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3183 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3184 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3185 } 3186 3187 /* multiply off-diagonal portion of matrix */ 3188 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3189 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3190 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3191 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3192 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3193 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3194 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3195 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3196 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3197 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3198 3199 /* local sweep */ 3200 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3201 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3202 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3203 PetscFunctionReturn(0); 3204 } 3205 3206 #undef __FUNCT__ 3207 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 3208 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3209 { 3210 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3211 PetscErrorCode ierr; 3212 Vec lvec1,bb1; 3213 3214 PetscFunctionBegin; 3215 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3216 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3217 3218 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3219 if (flag & SOR_ZERO_INITIAL_GUESS) { 3220 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3221 its--; 3222 } 3223 3224 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3225 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3226 while (its--) { 3227 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3228 3229 /* lower diagonal part: bb1 = bb - B^T*xx */ 3230 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3231 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3232 3233 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3234 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3235 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3236 3237 /* upper diagonal part: bb1 = bb1 - B*x */ 3238 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3239 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3240 3241 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3242 3243 /* diagonal sweep */ 3244 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3245 } 3246 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3247 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3248 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3249 PetscFunctionReturn(0); 3250 } 3251 3252 #undef __FUNCT__ 3253 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3254 /*@ 3255 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3256 CSR format the local rows. 3257 3258 Collective on MPI_Comm 3259 3260 Input Parameters: 3261 + comm - MPI communicator 3262 . bs - the block size, only a block size of 1 is supported 3263 . m - number of local rows (Cannot be PETSC_DECIDE) 3264 . n - This value should be the same as the local size used in creating the 3265 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3266 calculated if N is given) For square matrices n is almost always m. 3267 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3268 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3269 . i - row indices 3270 . j - column indices 3271 - a - matrix values 3272 3273 Output Parameter: 3274 . mat - the matrix 3275 3276 Level: intermediate 3277 3278 Notes: 3279 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3280 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3281 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3282 3283 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3284 3285 .keywords: matrix, aij, compressed row, sparse, parallel 3286 3287 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3288 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3289 @*/ 3290 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3291 { 3292 PetscErrorCode ierr; 3293 3294 3295 PetscFunctionBegin; 3296 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3297 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3298 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3299 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3300 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3301 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3302 PetscFunctionReturn(0); 3303 } 3304 3305 3306 #undef __FUNCT__ 3307 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3308 /*@C 3309 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3310 (the default parallel PETSc format). 3311 3312 Collective on MPI_Comm 3313 3314 Input Parameters: 3315 + B - the matrix 3316 . bs - the block size 3317 . i - the indices into j for the start of each local row (starts with zero) 3318 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3319 - v - optional values in the matrix 3320 3321 Level: developer 3322 3323 .keywords: matrix, aij, compressed row, sparse, parallel 3324 3325 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3326 @*/ 3327 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3328 { 3329 PetscErrorCode ierr; 3330 3331 PetscFunctionBegin; 3332 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3333 PetscFunctionReturn(0); 3334 } 3335 3336 #undef __FUNCT__ 3337 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3338 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3339 { 3340 PetscErrorCode ierr; 3341 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3342 PetscInt *indx; 3343 PetscScalar *values; 3344 3345 PetscFunctionBegin; 3346 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3347 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3348 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3349 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3350 PetscInt *bindx,rmax=a->rmax,j; 3351 3352 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3353 mbs = m/bs; Nbs = N/cbs; 3354 if (n == PETSC_DECIDE) { 3355 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3356 } 3357 /* Check sum(n) = Nbs */ 3358 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3359 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3360 3361 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3362 rstart -= mbs; 3363 3364 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3365 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3366 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3367 for (i=0; i<mbs; i++) { 3368 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3369 nnz = nnz/bs; 3370 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3371 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3372 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3373 } 3374 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3375 ierr = PetscFree(bindx);CHKERRQ(ierr); 3376 3377 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3378 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3379 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3380 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3381 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3382 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3383 } 3384 3385 /* numeric phase */ 3386 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3387 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3388 3389 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3390 for (i=0; i<m; i++) { 3391 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3392 Ii = i + rstart; 3393 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3394 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3395 } 3396 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3397 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3398 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3399 PetscFunctionReturn(0); 3400 } 3401