1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 12 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 19 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 25 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 26 { 27 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 32 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 33 PetscFunctionReturn(0); 34 } 35 36 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 37 { \ 38 \ 39 brow = row/bs; \ 40 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 41 rmax = aimax[brow]; nrow = ailen[brow]; \ 42 bcol = col/bs; \ 43 ridx = row % bs; cidx = col % bs; \ 44 low = 0; high = nrow; \ 45 while (high-low > 3) { \ 46 t = (low+high)/2; \ 47 if (rp[t] > bcol) high = t; \ 48 else low = t; \ 49 } \ 50 for (_i=low; _i<high; _i++) { \ 51 if (rp[_i] > bcol) break; \ 52 if (rp[_i] == bcol) { \ 53 bap = ap + bs2*_i + bs*cidx + ridx; \ 54 if (addv == ADD_VALUES) *bap += value; \ 55 else *bap = value; \ 56 goto a_noinsert; \ 57 } \ 58 } \ 59 if (a->nonew == 1) goto a_noinsert; \ 60 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 61 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 62 N = nrow++ - 1; \ 63 /* shift up all the later entries in this row */ \ 64 for (ii=N; ii>=_i; ii--) { \ 65 rp[ii+1] = rp[ii]; \ 66 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 67 } \ 68 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 69 rp[_i] = bcol; \ 70 ap[bs2*_i + bs*cidx + ridx] = value; \ 71 A->nonzerostate++;\ 72 a_noinsert:; \ 73 ailen[brow] = nrow; \ 74 } 75 76 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 77 { \ 78 brow = row/bs; \ 79 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 80 rmax = bimax[brow]; nrow = bilen[brow]; \ 81 bcol = col/bs; \ 82 ridx = row % bs; cidx = col % bs; \ 83 low = 0; high = nrow; \ 84 while (high-low > 3) { \ 85 t = (low+high)/2; \ 86 if (rp[t] > bcol) high = t; \ 87 else low = t; \ 88 } \ 89 for (_i=low; _i<high; _i++) { \ 90 if (rp[_i] > bcol) break; \ 91 if (rp[_i] == bcol) { \ 92 bap = ap + bs2*_i + bs*cidx + ridx; \ 93 if (addv == ADD_VALUES) *bap += value; \ 94 else *bap = value; \ 95 goto b_noinsert; \ 96 } \ 97 } \ 98 if (b->nonew == 1) goto b_noinsert; \ 99 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 100 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for (ii=N; ii>=_i; ii--) { \ 104 rp[ii+1] = rp[ii]; \ 105 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 106 } \ 107 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 B->nonzerostate++;\ 111 b_noinsert:; \ 112 bilen[brow] = nrow; \ 113 } 114 115 /* Only add/insert a(i,j) with i<=j (blocks). 116 Any a(i,j) with i>j input by user is ingored. 117 */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatSetValues_MPISBAIJ" 120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 121 { 122 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 123 MatScalar value; 124 PetscBool roworiented = baij->roworiented; 125 PetscErrorCode ierr; 126 PetscInt i,j,row,col; 127 PetscInt rstart_orig=mat->rmap->rstart; 128 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 129 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 130 131 /* Some Variables required in the macro */ 132 Mat A = baij->A; 133 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 134 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 135 MatScalar *aa =a->a; 136 137 Mat B = baij->B; 138 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 139 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 140 MatScalar *ba =b->a; 141 142 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 143 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 144 MatScalar *ap,*bap; 145 146 /* for stash */ 147 PetscInt n_loc, *in_loc = NULL; 148 MatScalar *v_loc = NULL; 149 150 PetscFunctionBegin; 151 if (!baij->donotstash) { 152 if (n > baij->n_loc) { 153 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 154 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 155 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 156 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 157 158 baij->n_loc = n; 159 } 160 in_loc = baij->in_loc; 161 v_loc = baij->v_loc; 162 } 163 164 for (i=0; i<m; i++) { 165 if (im[i] < 0) continue; 166 #if defined(PETSC_USE_DEBUG) 167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 168 #endif 169 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 170 row = im[i] - rstart_orig; /* local row index */ 171 for (j=0; j<n; j++) { 172 if (im[i]/bs > in[j]/bs) { 173 if (a->ignore_ltriangular) { 174 continue; /* ignore lower triangular blocks */ 175 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 176 } 177 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 178 col = in[j] - cstart_orig; /* local col index */ 179 brow = row/bs; bcol = col/bs; 180 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 181 if (roworiented) value = v[i*n+j]; 182 else value = v[i+j*m]; 183 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 184 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 185 } else if (in[j] < 0) continue; 186 #if defined(PETSC_USE_DEBUG) 187 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 188 #endif 189 else { /* off-diag entry (B) */ 190 if (mat->was_assembled) { 191 if (!baij->colmap) { 192 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 193 } 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 196 col = col - 1; 197 #else 198 col = baij->colmap[in[j]/bs] - 1; 199 #endif 200 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 201 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 202 col = in[j]; 203 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 204 B = baij->B; 205 b = (Mat_SeqBAIJ*)(B)->data; 206 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 207 ba = b->a; 208 } else col += in[j]%bs; 209 } else col = in[j]; 210 if (roworiented) value = v[i*n+j]; 211 else value = v[i+j*m]; 212 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 213 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 214 } 215 } 216 } else { /* off processor entry */ 217 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 218 if (!baij->donotstash) { 219 mat->assembled = PETSC_FALSE; 220 n_loc = 0; 221 for (j=0; j<n; j++) { 222 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 223 in_loc[n_loc] = in[j]; 224 if (roworiented) { 225 v_loc[n_loc] = v[i*n+j]; 226 } else { 227 v_loc[n_loc] = v[j*m+i]; 228 } 229 n_loc++; 230 } 231 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined" 240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 241 { 242 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 243 PetscErrorCode ierr; 244 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 245 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 246 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 247 PetscBool roworiented=a->roworiented; 248 const PetscScalar *value = v; 249 MatScalar *ap,*aa = a->a,*bap; 250 251 PetscFunctionBegin; 252 if (col < row) { 253 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 254 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 255 } 256 rp = aj + ai[row]; 257 ap = aa + bs2*ai[row]; 258 rmax = imax[row]; 259 nrow = ailen[row]; 260 value = v; 261 low = 0; 262 high = nrow; 263 264 while (high-low > 7) { 265 t = (low+high)/2; 266 if (rp[t] > col) high = t; 267 else low = t; 268 } 269 for (i=low; i<high; i++) { 270 if (rp[i] > col) break; 271 if (rp[i] == col) { 272 bap = ap + bs2*i; 273 if (roworiented) { 274 if (is == ADD_VALUES) { 275 for (ii=0; ii<bs; ii++) { 276 for (jj=ii; jj<bs2; jj+=bs) { 277 bap[jj] += *value++; 278 } 279 } 280 } else { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] = *value++; 284 } 285 } 286 } 287 } else { 288 if (is == ADD_VALUES) { 289 for (ii=0; ii<bs; ii++) { 290 for (jj=0; jj<bs; jj++) { 291 *bap++ += *value++; 292 } 293 } 294 } else { 295 for (ii=0; ii<bs; ii++) { 296 for (jj=0; jj<bs; jj++) { 297 *bap++ = *value++; 298 } 299 } 300 } 301 } 302 goto noinsert2; 303 } 304 } 305 if (nonew == 1) goto noinsert2; 306 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 307 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 308 N = nrow++ - 1; high++; 309 /* shift up all the later entries in this row */ 310 for (ii=N; ii>=i; ii--) { 311 rp[ii+1] = rp[ii]; 312 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 313 } 314 if (N >= i) { 315 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 316 } 317 rp[i] = col; 318 bap = ap + bs2*i; 319 if (roworiented) { 320 for (ii=0; ii<bs; ii++) { 321 for (jj=ii; jj<bs2; jj+=bs) { 322 bap[jj] = *value++; 323 } 324 } 325 } else { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=0; jj<bs; jj++) { 328 *bap++ = *value++; 329 } 330 } 331 } 332 noinsert2:; 333 ailen[row] = nrow; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 339 /* 340 This routine is exactly duplicated in mpibaij.c 341 */ 342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 343 { 344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 345 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 347 PetscErrorCode ierr; 348 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 349 PetscBool roworiented=a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap,*aa = a->a,*bap; 352 353 PetscFunctionBegin; 354 rp = aj + ai[row]; 355 ap = aa + bs2*ai[row]; 356 rmax = imax[row]; 357 nrow = ailen[row]; 358 low = 0; 359 high = nrow; 360 value = v; 361 while (high-low > 7) { 362 t = (low+high)/2; 363 if (rp[t] > col) high = t; 364 else low = t; 365 } 366 for (i=low; i<high; i++) { 367 if (rp[i] > col) break; 368 if (rp[i] == col) { 369 bap = ap + bs2*i; 370 if (roworiented) { 371 if (is == ADD_VALUES) { 372 for (ii=0; ii<bs; ii++) { 373 for (jj=ii; jj<bs2; jj+=bs) { 374 bap[jj] += *value++; 375 } 376 } 377 } else { 378 for (ii=0; ii<bs; ii++) { 379 for (jj=ii; jj<bs2; jj+=bs) { 380 bap[jj] = *value++; 381 } 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii=0; ii<bs; ii++,value+=bs) { 387 for (jj=0; jj<bs; jj++) { 388 bap[jj] += value[jj]; 389 } 390 bap += bs; 391 } 392 } else { 393 for (ii=0; ii<bs; ii++,value+=bs) { 394 for (jj=0; jj<bs; jj++) { 395 bap[jj] = value[jj]; 396 } 397 bap += bs; 398 } 399 } 400 } 401 goto noinsert2; 402 } 403 } 404 if (nonew == 1) goto noinsert2; 405 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 406 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 407 N = nrow++ - 1; high++; 408 /* shift up all the later entries in this row */ 409 for (ii=N; ii>=i; ii--) { 410 rp[ii+1] = rp[ii]; 411 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 412 } 413 if (N >= i) { 414 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 415 } 416 rp[i] = col; 417 bap = ap + bs2*i; 418 if (roworiented) { 419 for (ii=0; ii<bs; ii++) { 420 for (jj=ii; jj<bs2; jj+=bs) { 421 bap[jj] = *value++; 422 } 423 } 424 } else { 425 for (ii=0; ii<bs; ii++) { 426 for (jj=0; jj<bs; jj++) { 427 *bap++ = *value++; 428 } 429 } 430 } 431 noinsert2:; 432 ailen[row] = nrow; 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 438 /* 439 This routine could be optimized by removing the need for the block copy below and passing stride information 440 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 441 */ 442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 443 { 444 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 445 const MatScalar *value; 446 MatScalar *barray =baij->barray; 447 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 448 PetscErrorCode ierr; 449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 450 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 451 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 452 453 PetscFunctionBegin; 454 if (!barray) { 455 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 456 baij->barray = barray; 457 } 458 459 if (roworiented) { 460 stepval = (n-1)*bs; 461 } else { 462 stepval = (m-1)*bs; 463 } 464 for (i=0; i<m; i++) { 465 if (im[i] < 0) continue; 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 468 #endif 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 for (j=0; j<n; j++) { 472 if (im[i] > in[j]) { 473 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 474 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 475 } 476 /* If NumCol = 1 then a copy is not required */ 477 if ((roworiented) && (n == 1)) { 478 barray = (MatScalar*) v + i*bs2; 479 } else if ((!roworiented) && (m == 1)) { 480 barray = (MatScalar*) v + j*bs2; 481 } else { /* Here a copy is required */ 482 if (roworiented) { 483 value = v + i*(stepval+bs)*bs + j*bs; 484 } else { 485 value = v + j*(stepval+bs)*bs + i*bs; 486 } 487 for (ii=0; ii<bs; ii++,value+=stepval) { 488 for (jj=0; jj<bs; jj++) { 489 *barray++ = *value++; 490 } 491 } 492 barray -=bs2; 493 } 494 495 if (in[j] >= cstart && in[j] < cend) { 496 col = in[j] - cstart; 497 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 498 } else if (in[j] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 501 #endif 502 else { 503 if (mat->was_assembled) { 504 if (!baij->colmap) { 505 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 506 } 507 508 #if defined(PETSC_USE_DEBUG) 509 #if defined(PETSC_USE_CTABLE) 510 { PetscInt data; 511 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 512 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 513 } 514 #else 515 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 516 #endif 517 #endif 518 #if defined(PETSC_USE_CTABLE) 519 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col = (col - 1)/bs; 521 #else 522 col = (baij->colmap[in[j]] - 1)/bs; 523 #endif 524 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 } 528 } else col = in[j]; 529 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 530 } 531 } 532 } else { 533 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 534 if (!baij->donotstash) { 535 if (roworiented) { 536 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 537 } else { 538 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 539 } 540 } 541 } 542 } 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_MPISBAIJ" 548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 549 { 550 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 551 PetscErrorCode ierr; 552 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 553 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 554 555 PetscFunctionBegin; 556 for (i=0; i<m; i++) { 557 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 558 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 559 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 560 row = idxm[i] - bsrstart; 561 for (j=0; j<n; j++) { 562 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 563 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 564 if (idxn[j] >= bscstart && idxn[j] < bscend) { 565 col = idxn[j] - bscstart; 566 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 567 } else { 568 if (!baij->colmap) { 569 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 570 } 571 #if defined(PETSC_USE_CTABLE) 572 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 573 data--; 574 #else 575 data = baij->colmap[idxn[j]/bs]-1; 576 #endif 577 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 578 else { 579 col = data + idxn[j]%bs; 580 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } 582 } 583 } 584 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatNorm_MPISBAIJ" 591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 592 { 593 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscReal sum[2],*lnorm2; 596 597 PetscFunctionBegin; 598 if (baij->size == 1) { 599 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 600 } else { 601 if (type == NORM_FROBENIUS) { 602 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 603 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 604 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 605 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 606 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 607 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 608 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 609 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 610 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 611 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 612 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 613 PetscReal *rsum,*rsum2,vabs; 614 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 615 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 616 MatScalar *v; 617 618 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 619 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 620 /* Amat */ 621 v = amat->a; jj = amat->j; 622 for (brow=0; brow<mbs; brow++) { 623 grow = bs*(rstart + brow); 624 nz = amat->i[brow+1] - amat->i[brow]; 625 for (bcol=0; bcol<nz; bcol++) { 626 gcol = bs*(rstart + *jj); jj++; 627 for (col=0; col<bs; col++) { 628 for (row=0; row<bs; row++) { 629 vabs = PetscAbsScalar(*v); v++; 630 rsum[gcol+col] += vabs; 631 /* non-diagonal block */ 632 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 633 } 634 } 635 } 636 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 637 } 638 /* Bmat */ 639 v = bmat->a; jj = bmat->j; 640 for (brow=0; brow<mbs; brow++) { 641 grow = bs*(rstart + brow); 642 nz = bmat->i[brow+1] - bmat->i[brow]; 643 for (bcol=0; bcol<nz; bcol++) { 644 gcol = bs*garray[*jj]; jj++; 645 for (col=0; col<bs; col++) { 646 for (row=0; row<bs; row++) { 647 vabs = PetscAbsScalar(*v); v++; 648 rsum[gcol+col] += vabs; 649 rsum[grow+row] += vabs; 650 } 651 } 652 } 653 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 654 } 655 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 656 *norm = 0.0; 657 for (col=0; col<mat->cmap->N; col++) { 658 if (rsum2[col] > *norm) *norm = rsum2[col]; 659 } 660 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 668 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 671 PetscErrorCode ierr; 672 PetscInt nstash,reallocs; 673 674 PetscFunctionBegin; 675 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 676 677 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 678 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 679 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 680 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 681 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 682 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 688 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 689 { 690 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 691 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 692 PetscErrorCode ierr; 693 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 694 PetscInt *row,*col; 695 PetscBool other_disassembled; 696 PetscMPIInt n; 697 PetscBool r1,r2,r3; 698 MatScalar *val; 699 700 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 701 PetscFunctionBegin; 702 if (!baij->donotstash && !mat->nooffprocentries) { 703 while (1) { 704 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 705 if (!flg) break; 706 707 for (i=0; i<n;) { 708 /* Now identify the consecutive vals belonging to the same row */ 709 for (j=i,rstart=row[j]; j<n; j++) { 710 if (row[j] != rstart) break; 711 } 712 if (j < n) ncols = j-i; 713 else ncols = n-i; 714 /* Now assemble all these values with a single function call */ 715 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 716 i = j; 717 } 718 } 719 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 720 /* Now process the block-stash. Since the values are stashed column-oriented, 721 set the roworiented flag to column oriented, and after MatSetValues() 722 restore the original flags */ 723 r1 = baij->roworiented; 724 r2 = a->roworiented; 725 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 726 727 baij->roworiented = PETSC_FALSE; 728 a->roworiented = PETSC_FALSE; 729 730 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 731 while (1) { 732 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 733 if (!flg) break; 734 735 for (i=0; i<n;) { 736 /* Now identify the consecutive vals belonging to the same row */ 737 for (j=i,rstart=row[j]; j<n; j++) { 738 if (row[j] != rstart) break; 739 } 740 if (j < n) ncols = j-i; 741 else ncols = n-i; 742 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 743 i = j; 744 } 745 } 746 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 747 748 baij->roworiented = r1; 749 a->roworiented = r2; 750 751 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 752 } 753 754 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 755 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 756 757 /* determine if any processor has disassembled, if so we must 758 also disassemble ourselfs, in order that we may reassemble. */ 759 /* 760 if nonzero structure of submatrix B cannot change then we know that 761 no processor disassembled thus we can skip this stuff 762 */ 763 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 764 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 765 if (mat->was_assembled && !other_disassembled) { 766 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 767 } 768 } 769 770 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 771 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 772 } 773 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 774 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 775 776 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 777 778 baij->rowvalues = 0; 779 780 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 781 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 782 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 783 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 784 } 785 PetscFunctionReturn(0); 786 } 787 788 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer); 789 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 790 #include <petscdraw.h> 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 793 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 794 { 795 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 796 PetscErrorCode ierr; 797 PetscInt bs = mat->rmap->bs; 798 PetscMPIInt rank = baij->rank; 799 PetscBool iascii,isdraw; 800 PetscViewer sviewer; 801 PetscViewerFormat format; 802 803 PetscFunctionBegin; 804 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 805 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 806 if (iascii) { 807 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 808 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 809 MatInfo info; 810 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 811 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 812 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 814 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 816 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 818 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 819 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 820 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 821 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } else if (format == PETSC_VIEWER_ASCII_INFO) { 824 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 827 PetscFunctionReturn(0); 828 } 829 } 830 831 if (isdraw) { 832 PetscDraw draw; 833 PetscBool isnull; 834 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 835 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 836 if (isnull) PetscFunctionReturn(0); 837 } 838 839 { 840 /* assemble the entire matrix onto first processor. */ 841 Mat A; 842 Mat_SeqSBAIJ *Aloc; 843 Mat_SeqBAIJ *Bloc; 844 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 845 MatScalar *a; 846 const char *matname; 847 848 /* Should this be the same type as mat? */ 849 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 850 if (!rank) { 851 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 852 } else { 853 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 854 } 855 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 856 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 857 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 858 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 859 860 /* copy over the A part */ 861 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 862 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 863 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 864 865 for (i=0; i<mbs; i++) { 866 rvals[0] = bs*(baij->rstartbs + i); 867 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 868 for (j=ai[i]; j<ai[i+1]; j++) { 869 col = (baij->cstartbs+aj[j])*bs; 870 for (k=0; k<bs; k++) { 871 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 872 col++; 873 a += bs; 874 } 875 } 876 } 877 /* copy over the B part */ 878 Bloc = (Mat_SeqBAIJ*)baij->B->data; 879 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 880 for (i=0; i<mbs; i++) { 881 882 rvals[0] = bs*(baij->rstartbs + i); 883 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 884 for (j=ai[i]; j<ai[i+1]; j++) { 885 col = baij->garray[aj[j]]*bs; 886 for (k=0; k<bs; k++) { 887 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 888 col++; 889 a += bs; 890 } 891 } 892 } 893 ierr = PetscFree(rvals);CHKERRQ(ierr); 894 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 895 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 /* 897 Everyone has to call to draw the matrix since the graphics waits are 898 synchronized across all processors that share the PetscDraw object 899 */ 900 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 901 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 902 if (!rank) { 903 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 904 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 905 } 906 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 907 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 908 ierr = MatDestroy(&A);CHKERRQ(ierr); 909 } 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatView_MPISBAIJ_Binary" 915 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 916 { 917 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 918 Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 919 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 920 PetscErrorCode ierr; 921 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 922 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 923 int fd; 924 PetscScalar *column_values; 925 FILE *file; 926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 927 PetscInt message_count,flowcontrolcount; 928 929 PetscFunctionBegin; 930 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 931 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 932 nz = bs2*(A->nz + B->nz); 933 rlen = mat->rmap->n; 934 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 935 if (!rank) { 936 header[0] = MAT_FILE_CLASSID; 937 header[1] = mat->rmap->N; 938 header[2] = mat->cmap->N; 939 940 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 941 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 942 /* get largest number of rows any processor has */ 943 range = mat->rmap->range; 944 for (i=1; i<size; i++) { 945 rlen = PetscMax(rlen,range[i+1] - range[i]); 946 } 947 } else { 948 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 949 } 950 951 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 952 /* compute lengths of each row */ 953 for (i=0; i<a->mbs; i++) { 954 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 955 } 956 /* store the row lengths to the file */ 957 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 958 if (!rank) { 959 MPI_Status status; 960 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 961 rlen = (range[1] - range[0])/bs; 962 for (i=0; i<rlen; i++) { 963 for (j=0; j<bs; j++) { 964 row_lens[i*bs+j] = bs*crow_lens[i]; 965 } 966 } 967 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 968 for (i=1; i<size; i++) { 969 rlen = (range[i+1] - range[i])/bs; 970 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 971 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 972 for (k=0; k<rlen; k++) { 973 for (j=0; j<bs; j++) { 974 row_lens[k*bs+j] = bs*crow_lens[k]; 975 } 976 } 977 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 978 } 979 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 980 ierr = PetscFree(row_lens);CHKERRQ(ierr); 981 } else { 982 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 983 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 984 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 985 } 986 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 987 988 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 989 information needed to make it for each row from a block row. This does require more communication but still not more than 990 the communication needed for the nonzero values */ 991 nzmax = nz; /* space a largest processor needs */ 992 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 993 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 994 cnt = 0; 995 for (i=0; i<a->mbs; i++) { 996 pcnt = cnt; 997 for (j=B->i[i]; j<B->i[i+1]; j++) { 998 if ((col = garray[B->j[j]]) > cstart) break; 999 for (l=0; l<bs; l++) { 1000 column_indices[cnt++] = bs*col+l; 1001 } 1002 } 1003 for (k=A->i[i]; k<A->i[i+1]; k++) { 1004 for (l=0; l<bs; l++) { 1005 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1006 } 1007 } 1008 for (; j<B->i[i+1]; j++) { 1009 for (l=0; l<bs; l++) { 1010 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1011 } 1012 } 1013 len = cnt - pcnt; 1014 for (k=1; k<bs; k++) { 1015 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1016 cnt += len; 1017 } 1018 } 1019 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1020 1021 /* store the columns to the file */ 1022 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1023 if (!rank) { 1024 MPI_Status status; 1025 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1026 for (i=1; i<size; i++) { 1027 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1028 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1029 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1030 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1031 } 1032 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1033 } else { 1034 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1035 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1036 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1037 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1038 } 1039 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1040 1041 /* load up the numerical values */ 1042 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1043 cnt = 0; 1044 for (i=0; i<a->mbs; i++) { 1045 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1046 for (j=B->i[i]; j<B->i[i+1]; j++) { 1047 if (garray[B->j[j]] > cstart) break; 1048 for (l=0; l<bs; l++) { 1049 for (ll=0; ll<bs; ll++) { 1050 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1051 } 1052 } 1053 cnt += bs; 1054 } 1055 for (k=A->i[i]; k<A->i[i+1]; k++) { 1056 for (l=0; l<bs; l++) { 1057 for (ll=0; ll<bs; ll++) { 1058 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1059 } 1060 } 1061 cnt += bs; 1062 } 1063 for (; j<B->i[i+1]; j++) { 1064 for (l=0; l<bs; l++) { 1065 for (ll=0; ll<bs; ll++) { 1066 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1067 } 1068 } 1069 cnt += bs; 1070 } 1071 cnt += (bs-1)*rlen; 1072 } 1073 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1074 1075 /* store the column values to the file */ 1076 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1077 if (!rank) { 1078 MPI_Status status; 1079 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1080 for (i=1; i<size; i++) { 1081 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1082 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1083 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1084 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1085 } 1086 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1087 } else { 1088 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1089 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1090 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1091 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1092 } 1093 ierr = PetscFree(column_values);CHKERRQ(ierr); 1094 1095 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1096 if (file) { 1097 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1098 } 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatView_MPISBAIJ" 1104 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1105 { 1106 PetscErrorCode ierr; 1107 PetscBool iascii,isdraw,issocket,isbinary; 1108 1109 PetscFunctionBegin; 1110 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1111 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1112 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1113 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1114 if (iascii || isdraw || issocket) { 1115 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1116 } else if (isbinary) { 1117 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1118 } 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatDestroy_MPISBAIJ" 1124 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1125 { 1126 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 #if defined(PETSC_USE_LOG) 1131 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1132 #endif 1133 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1134 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1135 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1136 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1137 #if defined(PETSC_USE_CTABLE) 1138 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1139 #else 1140 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1141 #endif 1142 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1143 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1144 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1145 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 1146 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 1147 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 1148 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 1149 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 1150 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 1151 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1152 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1153 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1154 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 1155 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 1156 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1157 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1158 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1159 #endif 1160 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 1161 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1162 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1163 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1164 1165 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 1171 #if defined(PETSC_HAVE_ELEMENTAL) 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 1173 #endif 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 1179 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1180 { 1181 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1182 PetscErrorCode ierr; 1183 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1184 PetscScalar *from; 1185 const PetscScalar *x; 1186 1187 PetscFunctionBegin; 1188 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1189 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1190 1191 /* diagonal part */ 1192 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1193 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1194 1195 /* subdiagonal part */ 1196 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1197 1198 /* copy x into the vec slvec0 */ 1199 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1200 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1201 1202 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1203 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1204 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1205 1206 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1208 /* supperdiagonal part */ 1209 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatMult_MPISBAIJ" 1215 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1216 { 1217 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1218 PetscErrorCode ierr; 1219 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1220 PetscScalar *from; 1221 const PetscScalar *x; 1222 1223 PetscFunctionBegin; 1224 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1225 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1226 1227 /* diagonal part */ 1228 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1229 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1230 1231 /* subdiagonal part */ 1232 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1233 1234 /* copy x into the vec slvec0 */ 1235 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1236 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1237 1238 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1239 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1240 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1241 1242 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 /* supperdiagonal part */ 1245 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 1251 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1252 { 1253 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1254 PetscErrorCode ierr; 1255 PetscInt nt; 1256 1257 PetscFunctionBegin; 1258 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1259 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1260 1261 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1262 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1263 1264 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 /* do diagonal part */ 1266 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1267 /* do supperdiagonal part */ 1268 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1270 /* do subdiagonal part */ 1271 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1272 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1273 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 1279 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1280 { 1281 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1282 PetscErrorCode ierr; 1283 PetscInt mbs=a->mbs,bs=A->rmap->bs; 1284 PetscScalar *from,zero=0.0; 1285 const PetscScalar *x; 1286 1287 PetscFunctionBegin; 1288 /* 1289 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 1290 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1291 */ 1292 /* diagonal part */ 1293 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1294 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1295 1296 /* subdiagonal part */ 1297 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1298 1299 /* copy x into the vec slvec0 */ 1300 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1301 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1302 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1303 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1304 1305 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1306 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1307 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1308 1309 /* supperdiagonal part */ 1310 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 1316 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1317 { 1318 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1323 /* do diagonal part */ 1324 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1325 /* do supperdiagonal part */ 1326 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1327 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1328 1329 /* do subdiagonal part */ 1330 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1331 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1332 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 /* 1337 This only works correctly for square matrices where the subblock A->A is the 1338 diagonal block 1339 */ 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1342 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1343 { 1344 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1345 PetscErrorCode ierr; 1346 1347 PetscFunctionBegin; 1348 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1349 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatScale_MPISBAIJ" 1355 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1356 { 1357 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1358 PetscErrorCode ierr; 1359 1360 PetscFunctionBegin; 1361 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1362 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1368 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1369 { 1370 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1371 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1372 PetscErrorCode ierr; 1373 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1374 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1375 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1376 1377 PetscFunctionBegin; 1378 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1379 mat->getrowactive = PETSC_TRUE; 1380 1381 if (!mat->rowvalues && (idx || v)) { 1382 /* 1383 allocate enough space to hold information from the longest row. 1384 */ 1385 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1386 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1387 PetscInt max = 1,mbs = mat->mbs,tmp; 1388 for (i=0; i<mbs; i++) { 1389 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1390 if (max < tmp) max = tmp; 1391 } 1392 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1393 } 1394 1395 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1396 lrow = row - brstart; /* local row index */ 1397 1398 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1399 if (!v) {pvA = 0; pvB = 0;} 1400 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1401 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1402 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1403 nztot = nzA + nzB; 1404 1405 cmap = mat->garray; 1406 if (v || idx) { 1407 if (nztot) { 1408 /* Sort by increasing column numbers, assuming A and B already sorted */ 1409 PetscInt imark = -1; 1410 if (v) { 1411 *v = v_p = mat->rowvalues; 1412 for (i=0; i<nzB; i++) { 1413 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1414 else break; 1415 } 1416 imark = i; 1417 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1418 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1419 } 1420 if (idx) { 1421 *idx = idx_p = mat->rowindices; 1422 if (imark > -1) { 1423 for (i=0; i<imark; i++) { 1424 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1425 } 1426 } else { 1427 for (i=0; i<nzB; i++) { 1428 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1429 else break; 1430 } 1431 imark = i; 1432 } 1433 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1434 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1435 } 1436 } else { 1437 if (idx) *idx = 0; 1438 if (v) *v = 0; 1439 } 1440 } 1441 *nz = nztot; 1442 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1443 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1449 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1450 { 1451 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1452 1453 PetscFunctionBegin; 1454 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1455 baij->getrowactive = PETSC_FALSE; 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1461 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1462 { 1463 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1464 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1465 1466 PetscFunctionBegin; 1467 aA->getrow_utriangular = PETSC_TRUE; 1468 PetscFunctionReturn(0); 1469 } 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1472 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1473 { 1474 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1475 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1476 1477 PetscFunctionBegin; 1478 aA->getrow_utriangular = PETSC_FALSE; 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1484 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1485 { 1486 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1487 PetscErrorCode ierr; 1488 1489 PetscFunctionBegin; 1490 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1491 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1497 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1498 { 1499 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1500 PetscErrorCode ierr; 1501 1502 PetscFunctionBegin; 1503 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1504 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1505 PetscFunctionReturn(0); 1506 } 1507 1508 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1509 Input: isrow - distributed(parallel), 1510 iscol_local - locally owned (seq) 1511 */ 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "ISEqual_private" 1514 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1515 { 1516 PetscErrorCode ierr; 1517 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1518 const PetscInt *ptr1,*ptr2; 1519 1520 PetscFunctionBegin; 1521 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1522 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1523 if (sz1 > sz2) { 1524 *flg = PETSC_FALSE; 1525 PetscFunctionReturn(0); 1526 } 1527 1528 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1529 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1530 1531 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1532 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1533 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1534 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1535 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1536 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1537 1538 nmatch=0; 1539 k = 0; 1540 for (i=0; i<sz1; i++){ 1541 for (j=k; j<sz2; j++){ 1542 if (a1[i] == a2[j]) { 1543 k = j; nmatch++; 1544 break; 1545 } 1546 } 1547 } 1548 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1549 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1550 ierr = PetscFree(a1);CHKERRQ(ierr); 1551 ierr = PetscFree(a2);CHKERRQ(ierr); 1552 if (nmatch < sz1) { 1553 *flg = PETSC_FALSE; 1554 } else { 1555 *flg = PETSC_TRUE; 1556 } 1557 PetscFunctionReturn(0); 1558 } 1559 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1562 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1563 { 1564 PetscErrorCode ierr; 1565 IS iscol_local; 1566 PetscInt csize; 1567 PetscBool isequal; 1568 1569 PetscFunctionBegin; 1570 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1571 if (call == MAT_REUSE_MATRIX) { 1572 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1573 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1574 } else { 1575 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1576 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1577 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1578 } 1579 1580 /* now call MatGetSubMatrix_MPIBAIJ() */ 1581 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1582 if (call == MAT_INITIAL_MATRIX) { 1583 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1584 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1585 } 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1591 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1592 { 1593 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1598 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1604 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1605 { 1606 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1607 Mat A = a->A,B = a->B; 1608 PetscErrorCode ierr; 1609 PetscReal isend[5],irecv[5]; 1610 1611 PetscFunctionBegin; 1612 info->block_size = (PetscReal)matin->rmap->bs; 1613 1614 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1615 1616 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1617 isend[3] = info->memory; isend[4] = info->mallocs; 1618 1619 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1620 1621 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1622 isend[3] += info->memory; isend[4] += info->mallocs; 1623 if (flag == MAT_LOCAL) { 1624 info->nz_used = isend[0]; 1625 info->nz_allocated = isend[1]; 1626 info->nz_unneeded = isend[2]; 1627 info->memory = isend[3]; 1628 info->mallocs = isend[4]; 1629 } else if (flag == MAT_GLOBAL_MAX) { 1630 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1631 1632 info->nz_used = irecv[0]; 1633 info->nz_allocated = irecv[1]; 1634 info->nz_unneeded = irecv[2]; 1635 info->memory = irecv[3]; 1636 info->mallocs = irecv[4]; 1637 } else if (flag == MAT_GLOBAL_SUM) { 1638 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1639 1640 info->nz_used = irecv[0]; 1641 info->nz_allocated = irecv[1]; 1642 info->nz_unneeded = irecv[2]; 1643 info->memory = irecv[3]; 1644 info->mallocs = irecv[4]; 1645 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1646 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1647 info->fill_ratio_needed = 0; 1648 info->factor_mallocs = 0; 1649 PetscFunctionReturn(0); 1650 } 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1654 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1655 { 1656 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1657 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1658 PetscErrorCode ierr; 1659 1660 PetscFunctionBegin; 1661 switch (op) { 1662 case MAT_NEW_NONZERO_LOCATIONS: 1663 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1664 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1665 case MAT_KEEP_NONZERO_PATTERN: 1666 case MAT_NEW_NONZERO_LOCATION_ERR: 1667 MatCheckPreallocated(A,1); 1668 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1669 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1670 break; 1671 case MAT_ROW_ORIENTED: 1672 MatCheckPreallocated(A,1); 1673 a->roworiented = flg; 1674 1675 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1676 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1677 break; 1678 case MAT_NEW_DIAGONALS: 1679 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1680 break; 1681 case MAT_IGNORE_OFF_PROC_ENTRIES: 1682 a->donotstash = flg; 1683 break; 1684 case MAT_USE_HASH_TABLE: 1685 a->ht_flag = flg; 1686 break; 1687 case MAT_HERMITIAN: 1688 MatCheckPreallocated(A,1); 1689 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1690 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1691 1692 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1693 break; 1694 case MAT_SPD: 1695 A->spd_set = PETSC_TRUE; 1696 A->spd = flg; 1697 if (flg) { 1698 A->symmetric = PETSC_TRUE; 1699 A->structurally_symmetric = PETSC_TRUE; 1700 A->symmetric_set = PETSC_TRUE; 1701 A->structurally_symmetric_set = PETSC_TRUE; 1702 } 1703 break; 1704 case MAT_SYMMETRIC: 1705 MatCheckPreallocated(A,1); 1706 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1707 break; 1708 case MAT_STRUCTURALLY_SYMMETRIC: 1709 MatCheckPreallocated(A,1); 1710 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1711 break; 1712 case MAT_SYMMETRY_ETERNAL: 1713 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1714 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1715 break; 1716 case MAT_IGNORE_LOWER_TRIANGULAR: 1717 aA->ignore_ltriangular = flg; 1718 break; 1719 case MAT_ERROR_LOWER_TRIANGULAR: 1720 aA->ignore_ltriangular = flg; 1721 break; 1722 case MAT_GETROW_UPPERTRIANGULAR: 1723 aA->getrow_utriangular = flg; 1724 break; 1725 default: 1726 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1727 } 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1733 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1734 { 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 if (MAT_INITIAL_MATRIX || *B != A) { 1739 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1746 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1747 { 1748 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1749 Mat a = baij->A, b=baij->B; 1750 PetscErrorCode ierr; 1751 PetscInt nv,m,n; 1752 PetscBool flg; 1753 1754 PetscFunctionBegin; 1755 if (ll != rr) { 1756 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1757 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1758 } 1759 if (!ll) PetscFunctionReturn(0); 1760 1761 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1762 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1763 1764 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1765 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1766 1767 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1768 1769 /* left diagonalscale the off-diagonal part */ 1770 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1771 1772 /* scale the diagonal part */ 1773 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1774 1775 /* right diagonalscale the off-diagonal part */ 1776 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1778 PetscFunctionReturn(0); 1779 } 1780 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1783 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1784 { 1785 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1786 PetscErrorCode ierr; 1787 1788 PetscFunctionBegin; 1789 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1794 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatEqual_MPISBAIJ" 1797 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1798 { 1799 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1800 Mat a,b,c,d; 1801 PetscBool flg; 1802 PetscErrorCode ierr; 1803 1804 PetscFunctionBegin; 1805 a = matA->A; b = matA->B; 1806 c = matB->A; d = matB->B; 1807 1808 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1809 if (flg) { 1810 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1811 } 1812 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatCopy_MPISBAIJ" 1818 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1819 { 1820 PetscErrorCode ierr; 1821 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1822 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1823 1824 PetscFunctionBegin; 1825 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1826 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1827 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1828 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1829 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1830 } else { 1831 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1832 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1833 } 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1839 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1840 { 1841 PetscErrorCode ierr; 1842 1843 PetscFunctionBegin; 1844 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1850 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1851 { 1852 PetscErrorCode ierr; 1853 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1854 PetscBLASInt bnz,one=1; 1855 Mat_SeqSBAIJ *xa,*ya; 1856 Mat_SeqBAIJ *xb,*yb; 1857 1858 PetscFunctionBegin; 1859 if (str == SAME_NONZERO_PATTERN) { 1860 PetscScalar alpha = a; 1861 xa = (Mat_SeqSBAIJ*)xx->A->data; 1862 ya = (Mat_SeqSBAIJ*)yy->A->data; 1863 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1864 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1865 xb = (Mat_SeqBAIJ*)xx->B->data; 1866 yb = (Mat_SeqBAIJ*)yy->B->data; 1867 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1868 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1869 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1870 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1871 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1872 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1873 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1874 } else { 1875 Mat B; 1876 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1877 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1878 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1879 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1880 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1881 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1882 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1883 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1884 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1885 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1886 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1887 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1888 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1889 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1890 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1891 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1892 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1893 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1894 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1895 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1896 } 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1902 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1903 { 1904 PetscErrorCode ierr; 1905 PetscInt i; 1906 PetscBool flg; 1907 1908 PetscFunctionBegin; 1909 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1910 for (i=0; i<n; i++) { 1911 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1912 if (!flg) { 1913 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1914 } 1915 } 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatShift_MPISBAIJ" 1921 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 1922 { 1923 PetscErrorCode ierr; 1924 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 1925 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 1926 1927 PetscFunctionBegin; 1928 if (!Y->preallocated) { 1929 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 1930 } else if (!aij->nz) { 1931 PetscInt nonew = aij->nonew; 1932 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1933 aij->nonew = nonew; 1934 } 1935 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ" 1941 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1942 { 1943 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1944 PetscErrorCode ierr; 1945 1946 PetscFunctionBegin; 1947 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 1948 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 1949 if (d) { 1950 PetscInt rstart; 1951 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1952 *d += rstart/A->rmap->bs; 1953 1954 } 1955 PetscFunctionReturn(0); 1956 } 1957 1958 1959 /* -------------------------------------------------------------------*/ 1960 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1961 MatGetRow_MPISBAIJ, 1962 MatRestoreRow_MPISBAIJ, 1963 MatMult_MPISBAIJ, 1964 /* 4*/ MatMultAdd_MPISBAIJ, 1965 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1966 MatMultAdd_MPISBAIJ, 1967 0, 1968 0, 1969 0, 1970 /* 10*/ 0, 1971 0, 1972 0, 1973 MatSOR_MPISBAIJ, 1974 MatTranspose_MPISBAIJ, 1975 /* 15*/ MatGetInfo_MPISBAIJ, 1976 MatEqual_MPISBAIJ, 1977 MatGetDiagonal_MPISBAIJ, 1978 MatDiagonalScale_MPISBAIJ, 1979 MatNorm_MPISBAIJ, 1980 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1981 MatAssemblyEnd_MPISBAIJ, 1982 MatSetOption_MPISBAIJ, 1983 MatZeroEntries_MPISBAIJ, 1984 /* 24*/ 0, 1985 0, 1986 0, 1987 0, 1988 0, 1989 /* 29*/ MatSetUp_MPISBAIJ, 1990 0, 1991 0, 1992 0, 1993 0, 1994 /* 34*/ MatDuplicate_MPISBAIJ, 1995 0, 1996 0, 1997 0, 1998 0, 1999 /* 39*/ MatAXPY_MPISBAIJ, 2000 MatGetSubMatrices_MPISBAIJ, 2001 MatIncreaseOverlap_MPISBAIJ, 2002 MatGetValues_MPISBAIJ, 2003 MatCopy_MPISBAIJ, 2004 /* 44*/ 0, 2005 MatScale_MPISBAIJ, 2006 MatShift_MPISBAIJ, 2007 0, 2008 0, 2009 /* 49*/ 0, 2010 0, 2011 0, 2012 0, 2013 0, 2014 /* 54*/ 0, 2015 0, 2016 MatSetUnfactored_MPISBAIJ, 2017 0, 2018 MatSetValuesBlocked_MPISBAIJ, 2019 /* 59*/ MatGetSubMatrix_MPISBAIJ, 2020 0, 2021 0, 2022 0, 2023 0, 2024 /* 64*/ 0, 2025 0, 2026 0, 2027 0, 2028 0, 2029 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 2030 0, 2031 0, 2032 0, 2033 0, 2034 /* 74*/ 0, 2035 0, 2036 0, 2037 0, 2038 0, 2039 /* 79*/ 0, 2040 0, 2041 0, 2042 0, 2043 MatLoad_MPISBAIJ, 2044 /* 84*/ 0, 2045 0, 2046 0, 2047 0, 2048 0, 2049 /* 89*/ 0, 2050 0, 2051 0, 2052 0, 2053 0, 2054 /* 94*/ 0, 2055 0, 2056 0, 2057 0, 2058 0, 2059 /* 99*/ 0, 2060 0, 2061 0, 2062 0, 2063 0, 2064 /*104*/ 0, 2065 MatRealPart_MPISBAIJ, 2066 MatImaginaryPart_MPISBAIJ, 2067 MatGetRowUpperTriangular_MPISBAIJ, 2068 MatRestoreRowUpperTriangular_MPISBAIJ, 2069 /*109*/ 0, 2070 0, 2071 0, 2072 0, 2073 MatMissingDiagonal_MPISBAIJ, 2074 /*114*/ 0, 2075 0, 2076 0, 2077 0, 2078 0, 2079 /*119*/ 0, 2080 0, 2081 0, 2082 0, 2083 0, 2084 /*124*/ 0, 2085 0, 2086 0, 2087 0, 2088 0, 2089 /*129*/ 0, 2090 0, 2091 0, 2092 0, 2093 0, 2094 /*134*/ 0, 2095 0, 2096 0, 2097 0, 2098 0, 2099 /*139*/ 0, 2100 0, 2101 0, 2102 0, 2103 0, 2104 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 2105 }; 2106 2107 #undef __FUNCT__ 2108 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 2109 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 2110 { 2111 PetscFunctionBegin; 2112 *a = ((Mat_MPISBAIJ*)A->data)->A; 2113 PetscFunctionReturn(0); 2114 } 2115 2116 #undef __FUNCT__ 2117 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 2118 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2119 { 2120 Mat_MPISBAIJ *b; 2121 PetscErrorCode ierr; 2122 PetscInt i,mbs,Mbs; 2123 2124 PetscFunctionBegin; 2125 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2126 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2127 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2128 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2129 2130 b = (Mat_MPISBAIJ*)B->data; 2131 mbs = B->rmap->n/bs; 2132 Mbs = B->rmap->N/bs; 2133 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2134 2135 B->rmap->bs = bs; 2136 b->bs2 = bs*bs; 2137 b->mbs = mbs; 2138 b->Mbs = Mbs; 2139 b->nbs = B->cmap->n/bs; 2140 b->Nbs = B->cmap->N/bs; 2141 2142 for (i=0; i<=b->size; i++) { 2143 b->rangebs[i] = B->rmap->range[i]/bs; 2144 } 2145 b->rstartbs = B->rmap->rstart/bs; 2146 b->rendbs = B->rmap->rend/bs; 2147 2148 b->cstartbs = B->cmap->rstart/bs; 2149 b->cendbs = B->cmap->rend/bs; 2150 2151 if (!B->preallocated) { 2152 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2153 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2154 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 2155 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2156 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2157 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2158 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2159 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2160 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2161 } 2162 2163 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2164 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2165 2166 B->preallocated = PETSC_TRUE; 2167 PetscFunctionReturn(0); 2168 } 2169 2170 #undef __FUNCT__ 2171 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 2172 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2173 { 2174 PetscInt m,rstart,cstart,cend; 2175 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2176 const PetscInt *JJ =0; 2177 PetscScalar *values=0; 2178 PetscErrorCode ierr; 2179 2180 PetscFunctionBegin; 2181 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2182 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2183 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2184 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2185 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2186 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2187 m = B->rmap->n/bs; 2188 rstart = B->rmap->rstart/bs; 2189 cstart = B->cmap->rstart/bs; 2190 cend = B->cmap->rend/bs; 2191 2192 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2193 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2194 for (i=0; i<m; i++) { 2195 nz = ii[i+1] - ii[i]; 2196 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2197 nz_max = PetscMax(nz_max,nz); 2198 JJ = jj + ii[i]; 2199 for (j=0; j<nz; j++) { 2200 if (*JJ >= cstart) break; 2201 JJ++; 2202 } 2203 d = 0; 2204 for (; j<nz; j++) { 2205 if (*JJ++ >= cend) break; 2206 d++; 2207 } 2208 d_nnz[i] = d; 2209 o_nnz[i] = nz - d; 2210 } 2211 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2212 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2213 2214 values = (PetscScalar*)V; 2215 if (!values) { 2216 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2217 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2218 } 2219 for (i=0; i<m; i++) { 2220 PetscInt row = i + rstart; 2221 PetscInt ncols = ii[i+1] - ii[i]; 2222 const PetscInt *icols = jj + ii[i]; 2223 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2224 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2225 } 2226 2227 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2228 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2229 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2230 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 /*MC 2235 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2236 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2237 the matrix is stored. 2238 2239 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2240 can call MatSetOption(Mat, MAT_HERMITIAN); 2241 2242 Options Database Keys: 2243 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 2244 2245 Level: beginner 2246 2247 .seealso: MatCreateMPISBAIJ 2248 M*/ 2249 2250 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatCreate_MPISBAIJ" 2254 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2255 { 2256 Mat_MPISBAIJ *b; 2257 PetscErrorCode ierr; 2258 PetscBool flg = PETSC_FALSE; 2259 2260 PetscFunctionBegin; 2261 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2262 B->data = (void*)b; 2263 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2264 2265 B->ops->destroy = MatDestroy_MPISBAIJ; 2266 B->ops->view = MatView_MPISBAIJ; 2267 B->assembled = PETSC_FALSE; 2268 B->insertmode = NOT_SET_VALUES; 2269 2270 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2271 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2272 2273 /* build local table of row and column ownerships */ 2274 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2275 2276 /* build cache for off array entries formed */ 2277 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2278 2279 b->donotstash = PETSC_FALSE; 2280 b->colmap = NULL; 2281 b->garray = NULL; 2282 b->roworiented = PETSC_TRUE; 2283 2284 /* stuff used in block assembly */ 2285 b->barray = 0; 2286 2287 /* stuff used for matrix vector multiply */ 2288 b->lvec = 0; 2289 b->Mvctx = 0; 2290 b->slvec0 = 0; 2291 b->slvec0b = 0; 2292 b->slvec1 = 0; 2293 b->slvec1a = 0; 2294 b->slvec1b = 0; 2295 b->sMvctx = 0; 2296 2297 /* stuff for MatGetRow() */ 2298 b->rowindices = 0; 2299 b->rowvalues = 0; 2300 b->getrowactive = PETSC_FALSE; 2301 2302 /* hash table stuff */ 2303 b->ht = 0; 2304 b->hd = 0; 2305 b->ht_size = 0; 2306 b->ht_flag = PETSC_FALSE; 2307 b->ht_fact = 0; 2308 b->ht_total_ct = 0; 2309 b->ht_insert_ct = 0; 2310 2311 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2312 b->ijonly = PETSC_FALSE; 2313 2314 b->in_loc = 0; 2315 b->v_loc = 0; 2316 b->n_loc = 0; 2317 2318 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2319 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2320 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2321 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2322 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 2323 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 2324 #if defined(PETSC_HAVE_ELEMENTAL) 2325 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 2326 #endif 2327 2328 B->symmetric = PETSC_TRUE; 2329 B->structurally_symmetric = PETSC_TRUE; 2330 B->symmetric_set = PETSC_TRUE; 2331 B->structurally_symmetric_set = PETSC_TRUE; 2332 2333 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 2334 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 2335 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 2336 if (flg) { 2337 PetscReal fact = 1.39; 2338 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2339 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2340 if (fact <= 1.0) fact = 1.39; 2341 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2342 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2343 } 2344 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2345 PetscFunctionReturn(0); 2346 } 2347 2348 /*MC 2349 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2350 2351 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2352 and MATMPISBAIJ otherwise. 2353 2354 Options Database Keys: 2355 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2356 2357 Level: beginner 2358 2359 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2360 M*/ 2361 2362 #undef __FUNCT__ 2363 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 2364 /*@C 2365 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2366 the user should preallocate the matrix storage by setting the parameters 2367 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2368 performance can be increased by more than a factor of 50. 2369 2370 Collective on Mat 2371 2372 Input Parameters: 2373 + B - the matrix 2374 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2375 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2376 . d_nz - number of block nonzeros per block row in diagonal portion of local 2377 submatrix (same for all local rows) 2378 . d_nnz - array containing the number of block nonzeros in the various block rows 2379 in the upper triangular and diagonal part of the in diagonal portion of the local 2380 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2381 for the diagonal entry and set a value even if it is zero. 2382 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2383 submatrix (same for all local rows). 2384 - o_nnz - array containing the number of nonzeros in the various block rows of the 2385 off-diagonal portion of the local submatrix that is right of the diagonal 2386 (possibly different for each block row) or NULL. 2387 2388 2389 Options Database Keys: 2390 . -mat_no_unroll - uses code that does not unroll the loops in the 2391 block calculations (much slower) 2392 . -mat_block_size - size of the blocks to use 2393 2394 Notes: 2395 2396 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2397 than it must be used on all processors that share the object for that argument. 2398 2399 If the *_nnz parameter is given then the *_nz parameter is ignored 2400 2401 Storage Information: 2402 For a square global matrix we define each processor's diagonal portion 2403 to be its local rows and the corresponding columns (a square submatrix); 2404 each processor's off-diagonal portion encompasses the remainder of the 2405 local matrix (a rectangular submatrix). 2406 2407 The user can specify preallocated storage for the diagonal part of 2408 the local submatrix with either d_nz or d_nnz (not both). Set 2409 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2410 memory allocation. Likewise, specify preallocated storage for the 2411 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2412 2413 You can call MatGetInfo() to get information on how effective the preallocation was; 2414 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2415 You can also run with the option -info and look for messages with the string 2416 malloc in them to see if additional memory allocation was needed. 2417 2418 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2419 the figure below we depict these three local rows and all columns (0-11). 2420 2421 .vb 2422 0 1 2 3 4 5 6 7 8 9 10 11 2423 -------------------------- 2424 row 3 |. . . d d d o o o o o o 2425 row 4 |. . . d d d o o o o o o 2426 row 5 |. . . d d d o o o o o o 2427 -------------------------- 2428 .ve 2429 2430 Thus, any entries in the d locations are stored in the d (diagonal) 2431 submatrix, and any entries in the o locations are stored in the 2432 o (off-diagonal) submatrix. Note that the d matrix is stored in 2433 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2434 2435 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2436 plus the diagonal part of the d matrix, 2437 and o_nz should indicate the number of block nonzeros per row in the o matrix 2438 2439 In general, for PDE problems in which most nonzeros are near the diagonal, 2440 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2441 or you will get TERRIBLE performance; see the users' manual chapter on 2442 matrices. 2443 2444 Level: intermediate 2445 2446 .keywords: matrix, block, aij, compressed row, sparse, parallel 2447 2448 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2449 @*/ 2450 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2451 { 2452 PetscErrorCode ierr; 2453 2454 PetscFunctionBegin; 2455 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2456 PetscValidType(B,1); 2457 PetscValidLogicalCollectiveInt(B,bs,2); 2458 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2459 PetscFunctionReturn(0); 2460 } 2461 2462 #undef __FUNCT__ 2463 #define __FUNCT__ "MatCreateSBAIJ" 2464 /*@C 2465 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2466 (block compressed row). For good matrix assembly performance 2467 the user should preallocate the matrix storage by setting the parameters 2468 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2469 performance can be increased by more than a factor of 50. 2470 2471 Collective on MPI_Comm 2472 2473 Input Parameters: 2474 + comm - MPI communicator 2475 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2476 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2477 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2478 This value should be the same as the local size used in creating the 2479 y vector for the matrix-vector product y = Ax. 2480 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2481 This value should be the same as the local size used in creating the 2482 x vector for the matrix-vector product y = Ax. 2483 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2484 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2485 . d_nz - number of block nonzeros per block row in diagonal portion of local 2486 submatrix (same for all local rows) 2487 . d_nnz - array containing the number of block nonzeros in the various block rows 2488 in the upper triangular portion of the in diagonal portion of the local 2489 (possibly different for each block block row) or NULL. 2490 If you plan to factor the matrix you must leave room for the diagonal entry and 2491 set its value even if it is zero. 2492 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2493 submatrix (same for all local rows). 2494 - o_nnz - array containing the number of nonzeros in the various block rows of the 2495 off-diagonal portion of the local submatrix (possibly different for 2496 each block row) or NULL. 2497 2498 Output Parameter: 2499 . A - the matrix 2500 2501 Options Database Keys: 2502 . -mat_no_unroll - uses code that does not unroll the loops in the 2503 block calculations (much slower) 2504 . -mat_block_size - size of the blocks to use 2505 . -mat_mpi - use the parallel matrix data structures even on one processor 2506 (defaults to using SeqBAIJ format on one processor) 2507 2508 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2509 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2510 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2511 2512 Notes: 2513 The number of rows and columns must be divisible by blocksize. 2514 This matrix type does not support complex Hermitian operation. 2515 2516 The user MUST specify either the local or global matrix dimensions 2517 (possibly both). 2518 2519 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2520 than it must be used on all processors that share the object for that argument. 2521 2522 If the *_nnz parameter is given then the *_nz parameter is ignored 2523 2524 Storage Information: 2525 For a square global matrix we define each processor's diagonal portion 2526 to be its local rows and the corresponding columns (a square submatrix); 2527 each processor's off-diagonal portion encompasses the remainder of the 2528 local matrix (a rectangular submatrix). 2529 2530 The user can specify preallocated storage for the diagonal part of 2531 the local submatrix with either d_nz or d_nnz (not both). Set 2532 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2533 memory allocation. Likewise, specify preallocated storage for the 2534 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2535 2536 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2537 the figure below we depict these three local rows and all columns (0-11). 2538 2539 .vb 2540 0 1 2 3 4 5 6 7 8 9 10 11 2541 -------------------------- 2542 row 3 |. . . d d d o o o o o o 2543 row 4 |. . . d d d o o o o o o 2544 row 5 |. . . d d d o o o o o o 2545 -------------------------- 2546 .ve 2547 2548 Thus, any entries in the d locations are stored in the d (diagonal) 2549 submatrix, and any entries in the o locations are stored in the 2550 o (off-diagonal) submatrix. Note that the d matrix is stored in 2551 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2552 2553 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2554 plus the diagonal part of the d matrix, 2555 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2556 In general, for PDE problems in which most nonzeros are near the diagonal, 2557 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2558 or you will get TERRIBLE performance; see the users' manual chapter on 2559 matrices. 2560 2561 Level: intermediate 2562 2563 .keywords: matrix, block, aij, compressed row, sparse, parallel 2564 2565 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2566 @*/ 2567 2568 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2569 { 2570 PetscErrorCode ierr; 2571 PetscMPIInt size; 2572 2573 PetscFunctionBegin; 2574 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2575 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2576 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2577 if (size > 1) { 2578 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2579 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2580 } else { 2581 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2582 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2583 } 2584 PetscFunctionReturn(0); 2585 } 2586 2587 2588 #undef __FUNCT__ 2589 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2590 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2591 { 2592 Mat mat; 2593 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2594 PetscErrorCode ierr; 2595 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2596 PetscScalar *array; 2597 2598 PetscFunctionBegin; 2599 *newmat = 0; 2600 2601 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2602 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2603 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2604 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2605 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2606 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2607 2608 mat->factortype = matin->factortype; 2609 mat->preallocated = PETSC_TRUE; 2610 mat->assembled = PETSC_TRUE; 2611 mat->insertmode = NOT_SET_VALUES; 2612 2613 a = (Mat_MPISBAIJ*)mat->data; 2614 a->bs2 = oldmat->bs2; 2615 a->mbs = oldmat->mbs; 2616 a->nbs = oldmat->nbs; 2617 a->Mbs = oldmat->Mbs; 2618 a->Nbs = oldmat->Nbs; 2619 2620 2621 a->size = oldmat->size; 2622 a->rank = oldmat->rank; 2623 a->donotstash = oldmat->donotstash; 2624 a->roworiented = oldmat->roworiented; 2625 a->rowindices = 0; 2626 a->rowvalues = 0; 2627 a->getrowactive = PETSC_FALSE; 2628 a->barray = 0; 2629 a->rstartbs = oldmat->rstartbs; 2630 a->rendbs = oldmat->rendbs; 2631 a->cstartbs = oldmat->cstartbs; 2632 a->cendbs = oldmat->cendbs; 2633 2634 /* hash table stuff */ 2635 a->ht = 0; 2636 a->hd = 0; 2637 a->ht_size = 0; 2638 a->ht_flag = oldmat->ht_flag; 2639 a->ht_fact = oldmat->ht_fact; 2640 a->ht_total_ct = 0; 2641 a->ht_insert_ct = 0; 2642 2643 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2644 if (oldmat->colmap) { 2645 #if defined(PETSC_USE_CTABLE) 2646 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2647 #else 2648 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2649 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2650 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2651 #endif 2652 } else a->colmap = 0; 2653 2654 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2655 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2656 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2657 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2658 } else a->garray = 0; 2659 2660 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2661 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2662 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2663 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2664 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2665 2666 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2667 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2668 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2669 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2670 2671 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2672 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2673 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2674 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2675 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2676 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2677 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2678 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2679 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2680 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2681 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2682 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2683 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2684 2685 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2686 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2687 a->sMvctx = oldmat->sMvctx; 2688 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2689 2690 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2691 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2692 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2693 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2694 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2695 *newmat = mat; 2696 PetscFunctionReturn(0); 2697 } 2698 2699 #undef __FUNCT__ 2700 #define __FUNCT__ "MatLoad_MPISBAIJ" 2701 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2702 { 2703 PetscErrorCode ierr; 2704 PetscInt i,nz,j,rstart,rend; 2705 PetscScalar *vals,*buf; 2706 MPI_Comm comm; 2707 MPI_Status status; 2708 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2709 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2710 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2711 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2712 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2713 PetscInt dcount,kmax,k,nzcount,tmp; 2714 int fd; 2715 2716 PetscFunctionBegin; 2717 /* force binary viewer to load .info file if it has not yet done so */ 2718 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2719 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2720 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2721 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2722 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2723 if (bs < 0) bs = 1; 2724 2725 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2726 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2727 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2728 if (!rank) { 2729 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2730 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2731 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2732 } 2733 2734 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2735 M = header[1]; 2736 N = header[2]; 2737 2738 /* If global sizes are set, check if they are consistent with that given in the file */ 2739 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2740 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 2741 2742 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2743 2744 /* 2745 This code adds extra rows to make sure the number of rows is 2746 divisible by the blocksize 2747 */ 2748 Mbs = M/bs; 2749 extra_rows = bs - M + bs*(Mbs); 2750 if (extra_rows == bs) extra_rows = 0; 2751 else Mbs++; 2752 if (extra_rows &&!rank) { 2753 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2754 } 2755 2756 /* determine ownership of all rows */ 2757 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2758 mbs = Mbs/size + ((Mbs % size) > rank); 2759 m = mbs*bs; 2760 } else { /* User Set */ 2761 m = newmat->rmap->n; 2762 mbs = m/bs; 2763 } 2764 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2765 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2766 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2767 rowners[0] = 0; 2768 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2769 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2770 rstart = rowners[rank]; 2771 rend = rowners[rank+1]; 2772 2773 /* distribute row lengths to all processors */ 2774 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2775 if (!rank) { 2776 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2777 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2778 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2779 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2780 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2781 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2782 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2783 } else { 2784 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2785 } 2786 2787 if (!rank) { /* procs[0] */ 2788 /* calculate the number of nonzeros on each processor */ 2789 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2790 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2791 for (i=0; i<size; i++) { 2792 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2793 procsnz[i] += rowlengths[j]; 2794 } 2795 } 2796 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2797 2798 /* determine max buffer needed and allocate it */ 2799 maxnz = 0; 2800 for (i=0; i<size; i++) { 2801 maxnz = PetscMax(maxnz,procsnz[i]); 2802 } 2803 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2804 2805 /* read in my part of the matrix column indices */ 2806 nz = procsnz[0]; 2807 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2808 mycols = ibuf; 2809 if (size == 1) nz -= extra_rows; 2810 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2811 if (size == 1) { 2812 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2813 } 2814 2815 /* read in every ones (except the last) and ship off */ 2816 for (i=1; i<size-1; i++) { 2817 nz = procsnz[i]; 2818 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2819 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2820 } 2821 /* read in the stuff for the last proc */ 2822 if (size != 1) { 2823 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2824 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2825 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2826 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2827 } 2828 ierr = PetscFree(cols);CHKERRQ(ierr); 2829 } else { /* procs[i], i>0 */ 2830 /* determine buffer space needed for message */ 2831 nz = 0; 2832 for (i=0; i<m; i++) nz += locrowlens[i]; 2833 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2834 mycols = ibuf; 2835 /* receive message of column indices*/ 2836 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2837 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2838 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2839 } 2840 2841 /* loop over local rows, determining number of off diagonal entries */ 2842 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2843 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2844 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2845 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2846 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2847 rowcount = 0; 2848 nzcount = 0; 2849 for (i=0; i<mbs; i++) { 2850 dcount = 0; 2851 odcount = 0; 2852 for (j=0; j<bs; j++) { 2853 kmax = locrowlens[rowcount]; 2854 for (k=0; k<kmax; k++) { 2855 tmp = mycols[nzcount++]/bs; /* block col. index */ 2856 if (!mask[tmp]) { 2857 mask[tmp] = 1; 2858 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2859 else masked1[dcount++] = tmp; /* entry in diag portion */ 2860 } 2861 } 2862 rowcount++; 2863 } 2864 2865 dlens[i] = dcount; /* d_nzz[i] */ 2866 odlens[i] = odcount; /* o_nzz[i] */ 2867 2868 /* zero out the mask elements we set */ 2869 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2870 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2871 } 2872 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2873 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2874 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2875 2876 if (!rank) { 2877 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2878 /* read in my part of the matrix numerical values */ 2879 nz = procsnz[0]; 2880 vals = buf; 2881 mycols = ibuf; 2882 if (size == 1) nz -= extra_rows; 2883 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2884 if (size == 1) { 2885 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2886 } 2887 2888 /* insert into matrix */ 2889 jj = rstart*bs; 2890 for (i=0; i<m; i++) { 2891 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2892 mycols += locrowlens[i]; 2893 vals += locrowlens[i]; 2894 jj++; 2895 } 2896 2897 /* read in other processors (except the last one) and ship out */ 2898 for (i=1; i<size-1; i++) { 2899 nz = procsnz[i]; 2900 vals = buf; 2901 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2902 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2903 } 2904 /* the last proc */ 2905 if (size != 1) { 2906 nz = procsnz[i] - extra_rows; 2907 vals = buf; 2908 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2909 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2910 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2911 } 2912 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2913 2914 } else { 2915 /* receive numeric values */ 2916 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2917 2918 /* receive message of values*/ 2919 vals = buf; 2920 mycols = ibuf; 2921 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2922 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2923 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2924 2925 /* insert into matrix */ 2926 jj = rstart*bs; 2927 for (i=0; i<m; i++) { 2928 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2929 mycols += locrowlens[i]; 2930 vals += locrowlens[i]; 2931 jj++; 2932 } 2933 } 2934 2935 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2936 ierr = PetscFree(buf);CHKERRQ(ierr); 2937 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2938 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2939 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2940 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2941 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2942 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2943 PetscFunctionReturn(0); 2944 } 2945 2946 #undef __FUNCT__ 2947 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2948 /*XXXXX@ 2949 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2950 2951 Input Parameters: 2952 . mat - the matrix 2953 . fact - factor 2954 2955 Not Collective on Mat, each process can have a different hash factor 2956 2957 Level: advanced 2958 2959 Notes: 2960 This can also be set by the command line option: -mat_use_hash_table fact 2961 2962 .keywords: matrix, hashtable, factor, HT 2963 2964 .seealso: MatSetOption() 2965 @XXXXX*/ 2966 2967 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2970 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2971 { 2972 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2973 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2974 PetscReal atmp; 2975 PetscReal *work,*svalues,*rvalues; 2976 PetscErrorCode ierr; 2977 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2978 PetscMPIInt rank,size; 2979 PetscInt *rowners_bs,dest,count,source; 2980 PetscScalar *va; 2981 MatScalar *ba; 2982 MPI_Status stat; 2983 2984 PetscFunctionBegin; 2985 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2986 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2987 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2988 2989 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2990 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2991 2992 bs = A->rmap->bs; 2993 mbs = a->mbs; 2994 Mbs = a->Mbs; 2995 ba = b->a; 2996 bi = b->i; 2997 bj = b->j; 2998 2999 /* find ownerships */ 3000 rowners_bs = A->rmap->range; 3001 3002 /* each proc creates an array to be distributed */ 3003 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 3004 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 3005 3006 /* row_max for B */ 3007 if (rank != size-1) { 3008 for (i=0; i<mbs; i++) { 3009 ncols = bi[1] - bi[0]; bi++; 3010 brow = bs*i; 3011 for (j=0; j<ncols; j++) { 3012 bcol = bs*(*bj); 3013 for (kcol=0; kcol<bs; kcol++) { 3014 col = bcol + kcol; /* local col index */ 3015 col += rowners_bs[rank+1]; /* global col index */ 3016 for (krow=0; krow<bs; krow++) { 3017 atmp = PetscAbsScalar(*ba); ba++; 3018 row = brow + krow; /* local row index */ 3019 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 3020 if (work[col] < atmp) work[col] = atmp; 3021 } 3022 } 3023 bj++; 3024 } 3025 } 3026 3027 /* send values to its owners */ 3028 for (dest=rank+1; dest<size; dest++) { 3029 svalues = work + rowners_bs[dest]; 3030 count = rowners_bs[dest+1]-rowners_bs[dest]; 3031 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 3032 } 3033 } 3034 3035 /* receive values */ 3036 if (rank) { 3037 rvalues = work; 3038 count = rowners_bs[rank+1]-rowners_bs[rank]; 3039 for (source=0; source<rank; source++) { 3040 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 3041 /* process values */ 3042 for (i=0; i<count; i++) { 3043 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 3044 } 3045 } 3046 } 3047 3048 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3049 ierr = PetscFree(work);CHKERRQ(ierr); 3050 PetscFunctionReturn(0); 3051 } 3052 3053 #undef __FUNCT__ 3054 #define __FUNCT__ "MatSOR_MPISBAIJ" 3055 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3056 { 3057 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3058 PetscErrorCode ierr; 3059 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 3060 PetscScalar *x,*ptr,*from; 3061 Vec bb1; 3062 const PetscScalar *b; 3063 3064 PetscFunctionBegin; 3065 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3066 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3067 3068 if (flag == SOR_APPLY_UPPER) { 3069 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3070 PetscFunctionReturn(0); 3071 } 3072 3073 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3074 if (flag & SOR_ZERO_INITIAL_GUESS) { 3075 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3076 its--; 3077 } 3078 3079 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3080 while (its--) { 3081 3082 /* lower triangular part: slvec0b = - B^T*xx */ 3083 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3084 3085 /* copy xx into slvec0a */ 3086 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3087 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3088 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3089 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3090 3091 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3092 3093 /* copy bb into slvec1a */ 3094 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3095 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3096 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3097 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3098 3099 /* set slvec1b = 0 */ 3100 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3101 3102 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3103 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3104 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3105 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3106 3107 /* upper triangular part: bb1 = bb1 - B*x */ 3108 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3109 3110 /* local diagonal sweep */ 3111 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3112 } 3113 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3114 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3115 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3116 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 3117 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3118 } else if (flag & SOR_EISENSTAT) { 3119 Vec xx1; 3120 PetscBool hasop; 3121 const PetscScalar *diag; 3122 PetscScalar *sl,scale = (omega - 2.0)/omega; 3123 PetscInt i,n; 3124 3125 if (!mat->xx1) { 3126 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3127 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3128 } 3129 xx1 = mat->xx1; 3130 bb1 = mat->bb1; 3131 3132 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3133 3134 if (!mat->diag) { 3135 /* this is wrong for same matrix with new nonzero values */ 3136 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3137 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3138 } 3139 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3140 3141 if (hasop) { 3142 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3143 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3144 } else { 3145 /* 3146 These two lines are replaced by code that may be a bit faster for a good compiler 3147 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3148 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 3149 */ 3150 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3151 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3152 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3153 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3154 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3155 if (omega == 1.0) { 3156 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 3157 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3158 } else { 3159 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3160 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3161 } 3162 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 3163 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 3164 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3165 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3166 } 3167 3168 /* multiply off-diagonal portion of matrix */ 3169 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3170 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3171 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3172 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3173 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3174 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3175 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3176 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3177 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3178 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3179 3180 /* local sweep */ 3181 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3182 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3183 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3184 PetscFunctionReturn(0); 3185 } 3186 3187 #undef __FUNCT__ 3188 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 3189 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3190 { 3191 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3192 PetscErrorCode ierr; 3193 Vec lvec1,bb1; 3194 3195 PetscFunctionBegin; 3196 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3197 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3198 3199 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3200 if (flag & SOR_ZERO_INITIAL_GUESS) { 3201 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3202 its--; 3203 } 3204 3205 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 3206 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3207 while (its--) { 3208 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3209 3210 /* lower diagonal part: bb1 = bb - B^T*xx */ 3211 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3212 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 3213 3214 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3215 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3216 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3217 3218 /* upper diagonal part: bb1 = bb1 - B*x */ 3219 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 3220 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 3221 3222 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3223 3224 /* diagonal sweep */ 3225 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3226 } 3227 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 3228 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3229 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3230 PetscFunctionReturn(0); 3231 } 3232 3233 #undef __FUNCT__ 3234 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 3235 /*@ 3236 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3237 CSR format the local rows. 3238 3239 Collective on MPI_Comm 3240 3241 Input Parameters: 3242 + comm - MPI communicator 3243 . bs - the block size, only a block size of 1 is supported 3244 . m - number of local rows (Cannot be PETSC_DECIDE) 3245 . n - This value should be the same as the local size used in creating the 3246 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3247 calculated if N is given) For square matrices n is almost always m. 3248 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3249 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3250 . i - row indices 3251 . j - column indices 3252 - a - matrix values 3253 3254 Output Parameter: 3255 . mat - the matrix 3256 3257 Level: intermediate 3258 3259 Notes: 3260 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3261 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3262 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3263 3264 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3265 3266 .keywords: matrix, aij, compressed row, sparse, parallel 3267 3268 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3269 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3270 @*/ 3271 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3272 { 3273 PetscErrorCode ierr; 3274 3275 3276 PetscFunctionBegin; 3277 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3278 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3279 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3280 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3281 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3282 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3283 PetscFunctionReturn(0); 3284 } 3285 3286 3287 #undef __FUNCT__ 3288 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 3289 /*@C 3290 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3291 (the default parallel PETSc format). 3292 3293 Collective on MPI_Comm 3294 3295 Input Parameters: 3296 + B - the matrix 3297 . bs - the block size 3298 . i - the indices into j for the start of each local row (starts with zero) 3299 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3300 - v - optional values in the matrix 3301 3302 Level: developer 3303 3304 .keywords: matrix, aij, compressed row, sparse, parallel 3305 3306 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3307 @*/ 3308 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3309 { 3310 PetscErrorCode ierr; 3311 3312 PetscFunctionBegin; 3313 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3314 PetscFunctionReturn(0); 3315 } 3316 3317 #undef __FUNCT__ 3318 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 3319 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3320 { 3321 PetscErrorCode ierr; 3322 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3323 PetscInt *indx; 3324 PetscScalar *values; 3325 3326 PetscFunctionBegin; 3327 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3328 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3329 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 3330 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 3331 PetscInt *bindx,rmax=a->rmax,j; 3332 3333 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3334 mbs = m/bs; Nbs = N/cbs; 3335 if (n == PETSC_DECIDE) { 3336 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3337 } 3338 /* Check sum(n) = Nbs */ 3339 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3340 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3341 3342 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3343 rstart -= mbs; 3344 3345 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3346 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3347 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3348 for (i=0; i<mbs; i++) { 3349 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3350 nnz = nnz/bs; 3351 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3352 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3353 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3354 } 3355 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3356 ierr = PetscFree(bindx);CHKERRQ(ierr); 3357 3358 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3359 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3360 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3361 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 3362 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3363 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3364 } 3365 3366 /* numeric phase */ 3367 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3368 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3369 3370 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 3371 for (i=0; i<m; i++) { 3372 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3373 Ii = i + rstart; 3374 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3375 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3376 } 3377 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 3378 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3379 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3380 PetscFunctionReturn(0); 3381 } 3382