1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 9 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 10 { 11 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 12 PetscErrorCode ierr; 13 14 PetscFunctionBegin; 15 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 16 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 17 PetscFunctionReturn(0); 18 } 19 20 #undef __FUNCT__ 21 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 22 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 23 { 24 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 29 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 33 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 34 { \ 35 \ 36 brow = row/bs; \ 37 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 38 rmax = aimax[brow]; nrow = ailen[brow]; \ 39 bcol = col/bs; \ 40 ridx = row % bs; cidx = col % bs; \ 41 low = 0; high = nrow; \ 42 while (high-low > 3) { \ 43 t = (low+high)/2; \ 44 if (rp[t] > bcol) high = t; \ 45 else low = t; \ 46 } \ 47 for (_i=low; _i<high; _i++) { \ 48 if (rp[_i] > bcol) break; \ 49 if (rp[_i] == bcol) { \ 50 bap = ap + bs2*_i + bs*cidx + ridx; \ 51 if (addv == ADD_VALUES) *bap += value; \ 52 else *bap = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (a->nonew == 1) goto a_noinsert; \ 57 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 59 N = nrow++ - 1; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp[ii+1] = rp[ii]; \ 63 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 64 } \ 65 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 66 rp[_i] = bcol; \ 67 ap[bs2*_i + bs*cidx + ridx] = value; \ 68 A->nonzerostate++;\ 69 a_noinsert:; \ 70 ailen[brow] = nrow; \ 71 } 72 73 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 74 { \ 75 brow = row/bs; \ 76 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 77 rmax = bimax[brow]; nrow = bilen[brow]; \ 78 bcol = col/bs; \ 79 ridx = row % bs; cidx = col % bs; \ 80 low = 0; high = nrow; \ 81 while (high-low > 3) { \ 82 t = (low+high)/2; \ 83 if (rp[t] > bcol) high = t; \ 84 else low = t; \ 85 } \ 86 for (_i=low; _i<high; _i++) { \ 87 if (rp[_i] > bcol) break; \ 88 if (rp[_i] == bcol) { \ 89 bap = ap + bs2*_i + bs*cidx + ridx; \ 90 if (addv == ADD_VALUES) *bap += value; \ 91 else *bap = value; \ 92 goto b_noinsert; \ 93 } \ 94 } \ 95 if (b->nonew == 1) goto b_noinsert; \ 96 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 97 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 98 N = nrow++ - 1; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 103 } \ 104 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 105 rp[_i] = bcol; \ 106 ap[bs2*_i + bs*cidx + ridx] = value; \ 107 B->nonzerostate++;\ 108 b_noinsert:; \ 109 bilen[brow] = nrow; \ 110 } 111 112 /* Only add/insert a(i,j) with i<=j (blocks). 113 Any a(i,j) with i>j input by user is ingored. 114 */ 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatSetValues_MPISBAIJ" 117 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 118 { 119 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 120 MatScalar value; 121 PetscBool roworiented = baij->roworiented; 122 PetscErrorCode ierr; 123 PetscInt i,j,row,col; 124 PetscInt rstart_orig=mat->rmap->rstart; 125 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 126 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 127 128 /* Some Variables required in the macro */ 129 Mat A = baij->A; 130 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 131 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 132 MatScalar *aa =a->a; 133 134 Mat B = baij->B; 135 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 136 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 137 MatScalar *ba =b->a; 138 139 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 140 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 141 MatScalar *ap,*bap; 142 143 /* for stash */ 144 PetscInt n_loc, *in_loc = NULL; 145 MatScalar *v_loc = NULL; 146 147 PetscFunctionBegin; 148 if (!baij->donotstash) { 149 if (n > baij->n_loc) { 150 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 151 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 152 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 153 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 154 155 baij->n_loc = n; 156 } 157 in_loc = baij->in_loc; 158 v_loc = baij->v_loc; 159 } 160 161 for (i=0; i<m; i++) { 162 if (im[i] < 0) continue; 163 #if defined(PETSC_USE_DEBUG) 164 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 165 #endif 166 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 167 row = im[i] - rstart_orig; /* local row index */ 168 for (j=0; j<n; j++) { 169 if (im[i]/bs > in[j]/bs) { 170 if (a->ignore_ltriangular) { 171 continue; /* ignore lower triangular blocks */ 172 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 173 } 174 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 175 col = in[j] - cstart_orig; /* local col index */ 176 brow = row/bs; bcol = col/bs; 177 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 178 if (roworiented) value = v[i*n+j]; 179 else value = v[i+j*m]; 180 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 181 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 182 } else if (in[j] < 0) continue; 183 #if defined(PETSC_USE_DEBUG) 184 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 185 #endif 186 else { /* off-diag entry (B) */ 187 if (mat->was_assembled) { 188 if (!baij->colmap) { 189 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 190 } 191 #if defined(PETSC_USE_CTABLE) 192 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 193 col = col - 1; 194 #else 195 col = baij->colmap[in[j]/bs] - 1; 196 #endif 197 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 198 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 199 col = in[j]; 200 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 201 B = baij->B; 202 b = (Mat_SeqBAIJ*)(B)->data; 203 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 204 ba = b->a; 205 } else col += in[j]%bs; 206 } else col = in[j]; 207 if (roworiented) value = v[i*n+j]; 208 else value = v[i+j*m]; 209 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 210 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 211 } 212 } 213 } else { /* off processor entry */ 214 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 215 if (!baij->donotstash) { 216 mat->assembled = PETSC_FALSE; 217 n_loc = 0; 218 for (j=0; j<n; j++) { 219 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 220 in_loc[n_loc] = in[j]; 221 if (roworiented) { 222 v_loc[n_loc] = v[i*n+j]; 223 } else { 224 v_loc[n_loc] = v[j*m+i]; 225 } 226 n_loc++; 227 } 228 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 229 } 230 } 231 } 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 237 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 238 { 239 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 240 const MatScalar *value; 241 MatScalar *barray =baij->barray; 242 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 243 PetscErrorCode ierr; 244 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 245 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 246 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 247 248 PetscFunctionBegin; 249 if (!barray) { 250 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 251 baij->barray = barray; 252 } 253 254 if (roworiented) { 255 stepval = (n-1)*bs; 256 } else { 257 stepval = (m-1)*bs; 258 } 259 for (i=0; i<m; i++) { 260 if (im[i] < 0) continue; 261 #if defined(PETSC_USE_DEBUG) 262 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 263 #endif 264 if (im[i] >= rstart && im[i] < rend) { 265 row = im[i] - rstart; 266 for (j=0; j<n; j++) { 267 if (im[i] > in[j]) { 268 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 269 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 270 } 271 /* If NumCol = 1 then a copy is not required */ 272 if ((roworiented) && (n == 1)) { 273 barray = (MatScalar*) v + i*bs2; 274 } else if ((!roworiented) && (m == 1)) { 275 barray = (MatScalar*) v + j*bs2; 276 } else { /* Here a copy is required */ 277 if (roworiented) { 278 value = v + i*(stepval+bs)*bs + j*bs; 279 } else { 280 value = v + j*(stepval+bs)*bs + i*bs; 281 } 282 for (ii=0; ii<bs; ii++,value+=stepval) { 283 for (jj=0; jj<bs; jj++) { 284 *barray++ = *value++; 285 } 286 } 287 barray -=bs2; 288 } 289 290 if (in[j] >= cstart && in[j] < cend) { 291 col = in[j] - cstart; 292 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 293 } else if (in[j] < 0) continue; 294 #if defined(PETSC_USE_DEBUG) 295 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 296 #endif 297 else { 298 if (mat->was_assembled) { 299 if (!baij->colmap) { 300 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 303 #if defined(PETSC_USE_DEBUG) 304 #if defined(PETSC_USE_CTABLE) 305 { PetscInt data; 306 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 307 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 308 } 309 #else 310 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 311 #endif 312 #endif 313 #if defined(PETSC_USE_CTABLE) 314 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 315 col = (col - 1)/bs; 316 #else 317 col = (baij->colmap[in[j]] - 1)/bs; 318 #endif 319 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 320 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 321 col = in[j]; 322 } 323 } else col = in[j]; 324 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 325 } 326 } 327 } else { 328 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 329 if (!baij->donotstash) { 330 if (roworiented) { 331 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 332 } else { 333 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 334 } 335 } 336 } 337 } 338 PetscFunctionReturn(0); 339 } 340 341 #undef __FUNCT__ 342 #define __FUNCT__ "MatGetValues_MPISBAIJ" 343 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 344 { 345 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 346 PetscErrorCode ierr; 347 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 348 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 349 350 PetscFunctionBegin; 351 for (i=0; i<m; i++) { 352 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 353 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 354 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 355 row = idxm[i] - bsrstart; 356 for (j=0; j<n; j++) { 357 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 358 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 359 if (idxn[j] >= bscstart && idxn[j] < bscend) { 360 col = idxn[j] - bscstart; 361 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 362 } else { 363 if (!baij->colmap) { 364 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined(PETSC_USE_CTABLE) 367 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 368 data--; 369 #else 370 data = baij->colmap[idxn[j]/bs]-1; 371 #endif 372 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 373 else { 374 col = data + idxn[j]%bs; 375 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 376 } 377 } 378 } 379 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 380 } 381 PetscFunctionReturn(0); 382 } 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatNorm_MPISBAIJ" 386 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 387 { 388 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 389 PetscErrorCode ierr; 390 PetscReal sum[2],*lnorm2; 391 392 PetscFunctionBegin; 393 if (baij->size == 1) { 394 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 395 } else { 396 if (type == NORM_FROBENIUS) { 397 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 398 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 399 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 400 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 401 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 402 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 403 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 404 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 405 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 406 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 407 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 408 PetscReal *rsum,*rsum2,vabs; 409 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 410 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 411 MatScalar *v; 412 413 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 414 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 415 /* Amat */ 416 v = amat->a; jj = amat->j; 417 for (brow=0; brow<mbs; brow++) { 418 grow = bs*(rstart + brow); 419 nz = amat->i[brow+1] - amat->i[brow]; 420 for (bcol=0; bcol<nz; bcol++) { 421 gcol = bs*(rstart + *jj); jj++; 422 for (col=0; col<bs; col++) { 423 for (row=0; row<bs; row++) { 424 vabs = PetscAbsScalar(*v); v++; 425 rsum[gcol+col] += vabs; 426 /* non-diagonal block */ 427 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 428 } 429 } 430 } 431 } 432 /* Bmat */ 433 v = bmat->a; jj = bmat->j; 434 for (brow=0; brow<mbs; brow++) { 435 grow = bs*(rstart + brow); 436 nz = bmat->i[brow+1] - bmat->i[brow]; 437 for (bcol=0; bcol<nz; bcol++) { 438 gcol = bs*garray[*jj]; jj++; 439 for (col=0; col<bs; col++) { 440 for (row=0; row<bs; row++) { 441 vabs = PetscAbsScalar(*v); v++; 442 rsum[gcol+col] += vabs; 443 rsum[grow+row] += vabs; 444 } 445 } 446 } 447 } 448 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 449 *norm = 0.0; 450 for (col=0; col<mat->cmap->N; col++) { 451 if (rsum2[col] > *norm) *norm = rsum2[col]; 452 } 453 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 454 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 455 } 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 461 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 462 { 463 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 464 PetscErrorCode ierr; 465 PetscInt nstash,reallocs; 466 467 PetscFunctionBegin; 468 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 469 470 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 471 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 472 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 473 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 474 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 475 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 481 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 482 { 483 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 484 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 487 PetscInt *row,*col; 488 PetscBool other_disassembled; 489 PetscMPIInt n; 490 PetscBool r1,r2,r3; 491 MatScalar *val; 492 493 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 494 PetscFunctionBegin; 495 if (!baij->donotstash && !mat->nooffprocentries) { 496 while (1) { 497 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 498 if (!flg) break; 499 500 for (i=0; i<n;) { 501 /* Now identify the consecutive vals belonging to the same row */ 502 for (j=i,rstart=row[j]; j<n; j++) { 503 if (row[j] != rstart) break; 504 } 505 if (j < n) ncols = j-i; 506 else ncols = n-i; 507 /* Now assemble all these values with a single function call */ 508 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 509 i = j; 510 } 511 } 512 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 513 /* Now process the block-stash. Since the values are stashed column-oriented, 514 set the roworiented flag to column oriented, and after MatSetValues() 515 restore the original flags */ 516 r1 = baij->roworiented; 517 r2 = a->roworiented; 518 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 519 520 baij->roworiented = PETSC_FALSE; 521 a->roworiented = PETSC_FALSE; 522 523 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 524 while (1) { 525 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 526 if (!flg) break; 527 528 for (i=0; i<n;) { 529 /* Now identify the consecutive vals belonging to the same row */ 530 for (j=i,rstart=row[j]; j<n; j++) { 531 if (row[j] != rstart) break; 532 } 533 if (j < n) ncols = j-i; 534 else ncols = n-i; 535 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 536 i = j; 537 } 538 } 539 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 540 541 baij->roworiented = r1; 542 a->roworiented = r2; 543 544 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 545 } 546 547 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 548 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 549 550 /* determine if any processor has disassembled, if so we must 551 also disassemble ourselfs, in order that we may reassemble. */ 552 /* 553 if nonzero structure of submatrix B cannot change then we know that 554 no processor disassembled thus we can skip this stuff 555 */ 556 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 557 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 558 if (mat->was_assembled && !other_disassembled) { 559 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 560 } 561 } 562 563 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 564 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 565 } 566 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 567 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 568 569 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 570 571 baij->rowvalues = 0; 572 573 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 574 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 575 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 576 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 577 } 578 PetscFunctionReturn(0); 579 } 580 581 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 582 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 583 #include <petscdraw.h> 584 #undef __FUNCT__ 585 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 586 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 587 { 588 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 589 PetscErrorCode ierr; 590 PetscInt bs = mat->rmap->bs; 591 PetscMPIInt rank = baij->rank; 592 PetscBool iascii,isdraw; 593 PetscViewer sviewer; 594 PetscViewerFormat format; 595 596 PetscFunctionBegin; 597 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 598 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 599 if (iascii) { 600 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 601 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 602 MatInfo info; 603 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 604 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 605 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 606 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 607 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 608 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 609 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 610 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 611 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 612 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 613 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 614 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } else if (format == PETSC_VIEWER_ASCII_INFO) { 617 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 618 PetscFunctionReturn(0); 619 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 620 PetscFunctionReturn(0); 621 } 622 } 623 624 if (isdraw) { 625 PetscDraw draw; 626 PetscBool isnull; 627 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 628 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 629 } 630 631 { 632 /* assemble the entire matrix onto first processor. */ 633 Mat A; 634 Mat_SeqSBAIJ *Aloc; 635 Mat_SeqBAIJ *Bloc; 636 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 637 MatScalar *a; 638 const char *matname; 639 640 /* Should this be the same type as mat? */ 641 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 642 if (!rank) { 643 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 644 } else { 645 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 646 } 647 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 648 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 649 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 650 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 651 652 /* copy over the A part */ 653 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 654 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 655 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 656 657 for (i=0; i<mbs; i++) { 658 rvals[0] = bs*(baij->rstartbs + i); 659 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 660 for (j=ai[i]; j<ai[i+1]; j++) { 661 col = (baij->cstartbs+aj[j])*bs; 662 for (k=0; k<bs; k++) { 663 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 664 col++; 665 a += bs; 666 } 667 } 668 } 669 /* copy over the B part */ 670 Bloc = (Mat_SeqBAIJ*)baij->B->data; 671 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 672 for (i=0; i<mbs; i++) { 673 674 rvals[0] = bs*(baij->rstartbs + i); 675 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 676 for (j=ai[i]; j<ai[i+1]; j++) { 677 col = baij->garray[aj[j]]*bs; 678 for (k=0; k<bs; k++) { 679 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 680 col++; 681 a += bs; 682 } 683 } 684 } 685 ierr = PetscFree(rvals);CHKERRQ(ierr); 686 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 688 /* 689 Everyone has to call to draw the matrix since the graphics waits are 690 synchronized across all processors that share the PetscDraw object 691 */ 692 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 693 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 694 if (!rank) { 695 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 696 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 697 } 698 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 699 ierr = MatDestroy(&A);CHKERRQ(ierr); 700 } 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatView_MPISBAIJ" 706 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 707 { 708 PetscErrorCode ierr; 709 PetscBool iascii,isdraw,issocket,isbinary; 710 711 PetscFunctionBegin; 712 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 713 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 714 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 715 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 716 if (iascii || isdraw || issocket || isbinary) { 717 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 718 } 719 PetscFunctionReturn(0); 720 } 721 722 #undef __FUNCT__ 723 #define __FUNCT__ "MatDestroy_MPISBAIJ" 724 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 725 { 726 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 727 PetscErrorCode ierr; 728 729 PetscFunctionBegin; 730 #if defined(PETSC_USE_LOG) 731 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 732 #endif 733 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 734 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 735 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 736 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 737 #if defined(PETSC_USE_CTABLE) 738 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 739 #else 740 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 741 #endif 742 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 743 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 744 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 745 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 746 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 747 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 748 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 749 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 750 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 751 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 752 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 753 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 754 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 755 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 756 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 757 #if defined(PETSC_USE_REAL_MAT_SINGLE) 758 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 759 #endif 760 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 761 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 762 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 763 ierr = PetscFree(mat->data);CHKERRQ(ierr); 764 765 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 766 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 767 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 768 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 769 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 770 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 771 PetscFunctionReturn(0); 772 } 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 776 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 777 { 778 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 779 PetscErrorCode ierr; 780 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 781 PetscScalar *x,*from; 782 783 PetscFunctionBegin; 784 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 785 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 786 787 /* diagonal part */ 788 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 789 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 790 791 /* subdiagonal part */ 792 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 793 794 /* copy x into the vec slvec0 */ 795 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 796 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 797 798 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 799 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 800 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 801 802 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 803 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 804 /* supperdiagonal part */ 805 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatMult_MPISBAIJ" 811 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 812 { 813 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 814 PetscErrorCode ierr; 815 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 816 PetscScalar *x,*from; 817 818 PetscFunctionBegin; 819 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 820 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 821 822 /* diagonal part */ 823 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 824 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 825 826 /* subdiagonal part */ 827 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 828 829 /* copy x into the vec slvec0 */ 830 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 831 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 832 833 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 834 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 835 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 836 837 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 838 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 839 /* supperdiagonal part */ 840 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 841 PetscFunctionReturn(0); 842 } 843 844 #undef __FUNCT__ 845 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 846 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 847 { 848 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 849 PetscErrorCode ierr; 850 PetscInt nt; 851 852 PetscFunctionBegin; 853 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 854 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 855 856 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 857 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 858 859 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 /* do diagonal part */ 861 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 862 /* do supperdiagonal part */ 863 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 864 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 865 /* do subdiagonal part */ 866 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 867 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 868 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 869 PetscFunctionReturn(0); 870 } 871 872 #undef __FUNCT__ 873 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 874 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 875 { 876 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 877 PetscErrorCode ierr; 878 PetscInt mbs=a->mbs,bs=A->rmap->bs; 879 PetscScalar *x,*from,zero=0.0; 880 881 PetscFunctionBegin; 882 /* 883 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 884 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 885 */ 886 /* diagonal part */ 887 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 888 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 889 890 /* subdiagonal part */ 891 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 892 893 /* copy x into the vec slvec0 */ 894 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 895 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 896 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 897 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 898 899 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 901 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902 903 /* supperdiagonal part */ 904 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } 907 908 #undef __FUNCT__ 909 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 910 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 911 { 912 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 913 PetscErrorCode ierr; 914 915 PetscFunctionBegin; 916 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 /* do diagonal part */ 918 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 919 /* do supperdiagonal part */ 920 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 921 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 922 923 /* do subdiagonal part */ 924 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 925 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 926 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 927 PetscFunctionReturn(0); 928 } 929 930 /* 931 This only works correctly for square matrices where the subblock A->A is the 932 diagonal block 933 */ 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 936 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 937 { 938 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 939 PetscErrorCode ierr; 940 941 PetscFunctionBegin; 942 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 943 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatScale_MPISBAIJ" 949 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 950 { 951 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 952 PetscErrorCode ierr; 953 954 PetscFunctionBegin; 955 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 956 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 957 PetscFunctionReturn(0); 958 } 959 960 #undef __FUNCT__ 961 #define __FUNCT__ "MatGetRow_MPISBAIJ" 962 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 963 { 964 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 965 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 966 PetscErrorCode ierr; 967 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 968 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 969 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 970 971 PetscFunctionBegin; 972 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 973 mat->getrowactive = PETSC_TRUE; 974 975 if (!mat->rowvalues && (idx || v)) { 976 /* 977 allocate enough space to hold information from the longest row. 978 */ 979 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 980 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 981 PetscInt max = 1,mbs = mat->mbs,tmp; 982 for (i=0; i<mbs; i++) { 983 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 984 if (max < tmp) max = tmp; 985 } 986 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 987 } 988 989 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 990 lrow = row - brstart; /* local row index */ 991 992 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 993 if (!v) {pvA = 0; pvB = 0;} 994 if (!idx) {pcA = 0; if (!v) pcB = 0;} 995 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 996 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 997 nztot = nzA + nzB; 998 999 cmap = mat->garray; 1000 if (v || idx) { 1001 if (nztot) { 1002 /* Sort by increasing column numbers, assuming A and B already sorted */ 1003 PetscInt imark = -1; 1004 if (v) { 1005 *v = v_p = mat->rowvalues; 1006 for (i=0; i<nzB; i++) { 1007 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1008 else break; 1009 } 1010 imark = i; 1011 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1012 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1013 } 1014 if (idx) { 1015 *idx = idx_p = mat->rowindices; 1016 if (imark > -1) { 1017 for (i=0; i<imark; i++) { 1018 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1019 } 1020 } else { 1021 for (i=0; i<nzB; i++) { 1022 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1023 else break; 1024 } 1025 imark = i; 1026 } 1027 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1028 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1029 } 1030 } else { 1031 if (idx) *idx = 0; 1032 if (v) *v = 0; 1033 } 1034 } 1035 *nz = nztot; 1036 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1037 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1043 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1044 { 1045 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1046 1047 PetscFunctionBegin; 1048 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1049 baij->getrowactive = PETSC_FALSE; 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1055 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1056 { 1057 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1058 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1059 1060 PetscFunctionBegin; 1061 aA->getrow_utriangular = PETSC_TRUE; 1062 PetscFunctionReturn(0); 1063 } 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1066 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1067 { 1068 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1069 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1070 1071 PetscFunctionBegin; 1072 aA->getrow_utriangular = PETSC_FALSE; 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1078 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1079 { 1080 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1081 PetscErrorCode ierr; 1082 1083 PetscFunctionBegin; 1084 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1085 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1091 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1092 { 1093 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1098 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1104 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1105 { 1106 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1111 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1117 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1118 { 1119 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1120 Mat A = a->A,B = a->B; 1121 PetscErrorCode ierr; 1122 PetscReal isend[5],irecv[5]; 1123 1124 PetscFunctionBegin; 1125 info->block_size = (PetscReal)matin->rmap->bs; 1126 1127 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1128 1129 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1130 isend[3] = info->memory; isend[4] = info->mallocs; 1131 1132 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1133 1134 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1135 isend[3] += info->memory; isend[4] += info->mallocs; 1136 if (flag == MAT_LOCAL) { 1137 info->nz_used = isend[0]; 1138 info->nz_allocated = isend[1]; 1139 info->nz_unneeded = isend[2]; 1140 info->memory = isend[3]; 1141 info->mallocs = isend[4]; 1142 } else if (flag == MAT_GLOBAL_MAX) { 1143 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1144 1145 info->nz_used = irecv[0]; 1146 info->nz_allocated = irecv[1]; 1147 info->nz_unneeded = irecv[2]; 1148 info->memory = irecv[3]; 1149 info->mallocs = irecv[4]; 1150 } else if (flag == MAT_GLOBAL_SUM) { 1151 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1152 1153 info->nz_used = irecv[0]; 1154 info->nz_allocated = irecv[1]; 1155 info->nz_unneeded = irecv[2]; 1156 info->memory = irecv[3]; 1157 info->mallocs = irecv[4]; 1158 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1159 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1160 info->fill_ratio_needed = 0; 1161 info->factor_mallocs = 0; 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1167 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1168 { 1169 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1170 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1171 PetscErrorCode ierr; 1172 1173 PetscFunctionBegin; 1174 switch (op) { 1175 case MAT_NEW_NONZERO_LOCATIONS: 1176 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1177 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1178 case MAT_KEEP_NONZERO_PATTERN: 1179 case MAT_NEW_NONZERO_LOCATION_ERR: 1180 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1181 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1182 break; 1183 case MAT_ROW_ORIENTED: 1184 a->roworiented = flg; 1185 1186 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1187 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1188 break; 1189 case MAT_NEW_DIAGONALS: 1190 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1191 break; 1192 case MAT_IGNORE_OFF_PROC_ENTRIES: 1193 a->donotstash = flg; 1194 break; 1195 case MAT_USE_HASH_TABLE: 1196 a->ht_flag = flg; 1197 break; 1198 case MAT_HERMITIAN: 1199 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1200 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1201 1202 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1203 break; 1204 case MAT_SPD: 1205 A->spd_set = PETSC_TRUE; 1206 A->spd = flg; 1207 if (flg) { 1208 A->symmetric = PETSC_TRUE; 1209 A->structurally_symmetric = PETSC_TRUE; 1210 A->symmetric_set = PETSC_TRUE; 1211 A->structurally_symmetric_set = PETSC_TRUE; 1212 } 1213 break; 1214 case MAT_SYMMETRIC: 1215 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1216 break; 1217 case MAT_STRUCTURALLY_SYMMETRIC: 1218 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1219 break; 1220 case MAT_SYMMETRY_ETERNAL: 1221 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1222 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1223 break; 1224 case MAT_IGNORE_LOWER_TRIANGULAR: 1225 aA->ignore_ltriangular = flg; 1226 break; 1227 case MAT_ERROR_LOWER_TRIANGULAR: 1228 aA->ignore_ltriangular = flg; 1229 break; 1230 case MAT_GETROW_UPPERTRIANGULAR: 1231 aA->getrow_utriangular = flg; 1232 break; 1233 default: 1234 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1235 } 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1241 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1242 { 1243 PetscErrorCode ierr; 1244 1245 PetscFunctionBegin; 1246 if (MAT_INITIAL_MATRIX || *B != A) { 1247 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1248 } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1254 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1255 { 1256 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1257 Mat a = baij->A, b=baij->B; 1258 PetscErrorCode ierr; 1259 PetscInt nv,m,n; 1260 PetscBool flg; 1261 1262 PetscFunctionBegin; 1263 if (ll != rr) { 1264 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1265 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1266 } 1267 if (!ll) PetscFunctionReturn(0); 1268 1269 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1270 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1271 1272 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1273 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1274 1275 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 1277 /* left diagonalscale the off-diagonal part */ 1278 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1279 1280 /* scale the diagonal part */ 1281 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1282 1283 /* right diagonalscale the off-diagonal part */ 1284 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1285 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1291 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1292 { 1293 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1294 PetscErrorCode ierr; 1295 1296 PetscFunctionBegin; 1297 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1298 PetscFunctionReturn(0); 1299 } 1300 1301 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatEqual_MPISBAIJ" 1305 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1306 { 1307 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1308 Mat a,b,c,d; 1309 PetscBool flg; 1310 PetscErrorCode ierr; 1311 1312 PetscFunctionBegin; 1313 a = matA->A; b = matA->B; 1314 c = matB->A; d = matB->B; 1315 1316 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1317 if (flg) { 1318 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1319 } 1320 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatCopy_MPISBAIJ" 1326 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1327 { 1328 PetscErrorCode ierr; 1329 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1330 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1331 1332 PetscFunctionBegin; 1333 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1334 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1335 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1336 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1337 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1338 } else { 1339 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1340 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1341 } 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1347 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1348 { 1349 PetscErrorCode ierr; 1350 1351 PetscFunctionBegin; 1352 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1358 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1359 { 1360 PetscErrorCode ierr; 1361 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1362 PetscBLASInt bnz,one=1; 1363 Mat_SeqSBAIJ *xa,*ya; 1364 Mat_SeqBAIJ *xb,*yb; 1365 1366 PetscFunctionBegin; 1367 if (str == SAME_NONZERO_PATTERN) { 1368 PetscScalar alpha = a; 1369 xa = (Mat_SeqSBAIJ*)xx->A->data; 1370 ya = (Mat_SeqSBAIJ*)yy->A->data; 1371 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1372 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1373 xb = (Mat_SeqBAIJ*)xx->B->data; 1374 yb = (Mat_SeqBAIJ*)yy->B->data; 1375 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1376 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1377 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1378 } else { 1379 Mat B; 1380 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1381 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1382 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1383 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1384 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1385 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1386 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1387 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1388 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1389 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1390 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1391 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1392 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1393 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1394 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1395 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1396 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1397 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1398 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1399 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1406 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1407 { 1408 PetscErrorCode ierr; 1409 PetscInt i; 1410 PetscBool flg; 1411 1412 PetscFunctionBegin; 1413 for (i=0; i<n; i++) { 1414 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1415 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1416 } 1417 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 1422 /* -------------------------------------------------------------------*/ 1423 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1424 MatGetRow_MPISBAIJ, 1425 MatRestoreRow_MPISBAIJ, 1426 MatMult_MPISBAIJ, 1427 /* 4*/ MatMultAdd_MPISBAIJ, 1428 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1429 MatMultAdd_MPISBAIJ, 1430 0, 1431 0, 1432 0, 1433 /* 10*/ 0, 1434 0, 1435 0, 1436 MatSOR_MPISBAIJ, 1437 MatTranspose_MPISBAIJ, 1438 /* 15*/ MatGetInfo_MPISBAIJ, 1439 MatEqual_MPISBAIJ, 1440 MatGetDiagonal_MPISBAIJ, 1441 MatDiagonalScale_MPISBAIJ, 1442 MatNorm_MPISBAIJ, 1443 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1444 MatAssemblyEnd_MPISBAIJ, 1445 MatSetOption_MPISBAIJ, 1446 MatZeroEntries_MPISBAIJ, 1447 /* 24*/ 0, 1448 0, 1449 0, 1450 0, 1451 0, 1452 /* 29*/ MatSetUp_MPISBAIJ, 1453 0, 1454 0, 1455 0, 1456 0, 1457 /* 34*/ MatDuplicate_MPISBAIJ, 1458 0, 1459 0, 1460 0, 1461 0, 1462 /* 39*/ MatAXPY_MPISBAIJ, 1463 MatGetSubMatrices_MPISBAIJ, 1464 MatIncreaseOverlap_MPISBAIJ, 1465 MatGetValues_MPISBAIJ, 1466 MatCopy_MPISBAIJ, 1467 /* 44*/ 0, 1468 MatScale_MPISBAIJ, 1469 0, 1470 0, 1471 0, 1472 /* 49*/ 0, 1473 0, 1474 0, 1475 0, 1476 0, 1477 /* 54*/ 0, 1478 0, 1479 MatSetUnfactored_MPISBAIJ, 1480 0, 1481 MatSetValuesBlocked_MPISBAIJ, 1482 /* 59*/ 0, 1483 0, 1484 0, 1485 0, 1486 0, 1487 /* 64*/ 0, 1488 0, 1489 0, 1490 0, 1491 0, 1492 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1493 0, 1494 0, 1495 0, 1496 0, 1497 /* 74*/ 0, 1498 0, 1499 0, 1500 0, 1501 0, 1502 /* 79*/ 0, 1503 0, 1504 0, 1505 0, 1506 MatLoad_MPISBAIJ, 1507 /* 84*/ 0, 1508 0, 1509 0, 1510 0, 1511 0, 1512 /* 89*/ 0, 1513 0, 1514 0, 1515 0, 1516 0, 1517 /* 94*/ 0, 1518 0, 1519 0, 1520 0, 1521 0, 1522 /* 99*/ 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 /*104*/ 0, 1528 MatRealPart_MPISBAIJ, 1529 MatImaginaryPart_MPISBAIJ, 1530 MatGetRowUpperTriangular_MPISBAIJ, 1531 MatRestoreRowUpperTriangular_MPISBAIJ, 1532 /*109*/ 0, 1533 0, 1534 0, 1535 0, 1536 0, 1537 /*114*/ 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 /*119*/ 0, 1543 0, 1544 0, 1545 0, 1546 0, 1547 /*124*/ 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 /*129*/ 0, 1553 0, 1554 0, 1555 0, 1556 0, 1557 /*134*/ 0, 1558 0, 1559 0, 1560 0, 1561 0, 1562 /*139*/ 0, 1563 0, 1564 0 1565 }; 1566 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1570 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1571 { 1572 PetscFunctionBegin; 1573 *a = ((Mat_MPISBAIJ*)A->data)->A; 1574 PetscFunctionReturn(0); 1575 } 1576 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1579 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1580 { 1581 Mat_MPISBAIJ *b; 1582 PetscErrorCode ierr; 1583 PetscInt i,mbs,Mbs; 1584 1585 PetscFunctionBegin; 1586 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1587 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1588 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1589 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1590 1591 b = (Mat_MPISBAIJ*)B->data; 1592 mbs = B->rmap->n/bs; 1593 Mbs = B->rmap->N/bs; 1594 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1595 1596 B->rmap->bs = bs; 1597 b->bs2 = bs*bs; 1598 b->mbs = mbs; 1599 b->nbs = mbs; 1600 b->Mbs = Mbs; 1601 b->Nbs = Mbs; 1602 1603 for (i=0; i<=b->size; i++) { 1604 b->rangebs[i] = B->rmap->range[i]/bs; 1605 } 1606 b->rstartbs = B->rmap->rstart/bs; 1607 b->rendbs = B->rmap->rend/bs; 1608 1609 b->cstartbs = B->cmap->rstart/bs; 1610 b->cendbs = B->cmap->rend/bs; 1611 1612 if (!B->preallocated) { 1613 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1614 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1615 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1616 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1617 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1618 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1619 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1620 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1621 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1622 } 1623 1624 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1625 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1626 1627 B->preallocated = PETSC_TRUE; 1628 PetscFunctionReturn(0); 1629 } 1630 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1633 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1634 { 1635 PetscInt m,rstart,cstart,cend; 1636 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1637 const PetscInt *JJ =0; 1638 PetscScalar *values=0; 1639 PetscErrorCode ierr; 1640 1641 PetscFunctionBegin; 1642 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1643 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1644 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1645 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1646 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1647 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1648 m = B->rmap->n/bs; 1649 rstart = B->rmap->rstart/bs; 1650 cstart = B->cmap->rstart/bs; 1651 cend = B->cmap->rend/bs; 1652 1653 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1654 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1655 for (i=0; i<m; i++) { 1656 nz = ii[i+1] - ii[i]; 1657 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1658 nz_max = PetscMax(nz_max,nz); 1659 JJ = jj + ii[i]; 1660 for (j=0; j<nz; j++) { 1661 if (*JJ >= cstart) break; 1662 JJ++; 1663 } 1664 d = 0; 1665 for (; j<nz; j++) { 1666 if (*JJ++ >= cend) break; 1667 d++; 1668 } 1669 d_nnz[i] = d; 1670 o_nnz[i] = nz - d; 1671 } 1672 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1673 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1674 1675 values = (PetscScalar*)V; 1676 if (!values) { 1677 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 1678 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1679 } 1680 for (i=0; i<m; i++) { 1681 PetscInt row = i + rstart; 1682 PetscInt ncols = ii[i+1] - ii[i]; 1683 const PetscInt *icols = jj + ii[i]; 1684 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1685 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1686 } 1687 1688 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1689 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1690 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #if defined(PETSC_HAVE_MUMPS) 1696 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1697 #endif 1698 #if defined(PETSC_HAVE_PASTIX) 1699 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1700 #endif 1701 1702 /*MC 1703 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1704 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1705 the matrix is stored. 1706 1707 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1708 can call MatSetOption(Mat, MAT_HERMITIAN); 1709 1710 Options Database Keys: 1711 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1712 1713 Level: beginner 1714 1715 .seealso: MatCreateMPISBAIJ 1716 M*/ 1717 1718 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatCreate_MPISBAIJ" 1722 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1723 { 1724 Mat_MPISBAIJ *b; 1725 PetscErrorCode ierr; 1726 PetscBool flg; 1727 1728 PetscFunctionBegin; 1729 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1730 B->data = (void*)b; 1731 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1732 1733 B->ops->destroy = MatDestroy_MPISBAIJ; 1734 B->ops->view = MatView_MPISBAIJ; 1735 B->assembled = PETSC_FALSE; 1736 B->insertmode = NOT_SET_VALUES; 1737 1738 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 1739 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 1740 1741 /* build local table of row and column ownerships */ 1742 ierr = PetscMalloc1((b->size+2),&b->rangebs);CHKERRQ(ierr); 1743 1744 /* build cache for off array entries formed */ 1745 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 1746 1747 b->donotstash = PETSC_FALSE; 1748 b->colmap = NULL; 1749 b->garray = NULL; 1750 b->roworiented = PETSC_TRUE; 1751 1752 /* stuff used in block assembly */ 1753 b->barray = 0; 1754 1755 /* stuff used for matrix vector multiply */ 1756 b->lvec = 0; 1757 b->Mvctx = 0; 1758 b->slvec0 = 0; 1759 b->slvec0b = 0; 1760 b->slvec1 = 0; 1761 b->slvec1a = 0; 1762 b->slvec1b = 0; 1763 b->sMvctx = 0; 1764 1765 /* stuff for MatGetRow() */ 1766 b->rowindices = 0; 1767 b->rowvalues = 0; 1768 b->getrowactive = PETSC_FALSE; 1769 1770 /* hash table stuff */ 1771 b->ht = 0; 1772 b->hd = 0; 1773 b->ht_size = 0; 1774 b->ht_flag = PETSC_FALSE; 1775 b->ht_fact = 0; 1776 b->ht_total_ct = 0; 1777 b->ht_insert_ct = 0; 1778 1779 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1780 b->ijonly = PETSC_FALSE; 1781 1782 b->in_loc = 0; 1783 b->v_loc = 0; 1784 b->n_loc = 0; 1785 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1786 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 1787 if (flg) { 1788 PetscReal fact = 1.39; 1789 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1790 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 1791 if (fact <= 1.0) fact = 1.39; 1792 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1793 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1794 } 1795 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1796 1797 #if defined(PETSC_HAVE_PASTIX) 1798 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1799 #endif 1800 #if defined(PETSC_HAVE_MUMPS) 1801 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1802 #endif 1803 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1804 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1805 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1806 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1807 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1808 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1809 1810 B->symmetric = PETSC_TRUE; 1811 B->structurally_symmetric = PETSC_TRUE; 1812 B->symmetric_set = PETSC_TRUE; 1813 B->structurally_symmetric_set = PETSC_TRUE; 1814 1815 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1816 PetscFunctionReturn(0); 1817 } 1818 1819 /*MC 1820 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1821 1822 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1823 and MATMPISBAIJ otherwise. 1824 1825 Options Database Keys: 1826 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1827 1828 Level: beginner 1829 1830 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1831 M*/ 1832 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1835 /*@C 1836 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1837 the user should preallocate the matrix storage by setting the parameters 1838 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1839 performance can be increased by more than a factor of 50. 1840 1841 Collective on Mat 1842 1843 Input Parameters: 1844 + B - the matrix 1845 . bs - size of blockk 1846 . d_nz - number of block nonzeros per block row in diagonal portion of local 1847 submatrix (same for all local rows) 1848 . d_nnz - array containing the number of block nonzeros in the various block rows 1849 in the upper triangular and diagonal part of the in diagonal portion of the local 1850 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 1851 for the diagonal entry and set a value even if it is zero. 1852 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1853 submatrix (same for all local rows). 1854 - o_nnz - array containing the number of nonzeros in the various block rows of the 1855 off-diagonal portion of the local submatrix that is right of the diagonal 1856 (possibly different for each block row) or NULL. 1857 1858 1859 Options Database Keys: 1860 . -mat_no_unroll - uses code that does not unroll the loops in the 1861 block calculations (much slower) 1862 . -mat_block_size - size of the blocks to use 1863 1864 Notes: 1865 1866 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1867 than it must be used on all processors that share the object for that argument. 1868 1869 If the *_nnz parameter is given then the *_nz parameter is ignored 1870 1871 Storage Information: 1872 For a square global matrix we define each processor's diagonal portion 1873 to be its local rows and the corresponding columns (a square submatrix); 1874 each processor's off-diagonal portion encompasses the remainder of the 1875 local matrix (a rectangular submatrix). 1876 1877 The user can specify preallocated storage for the diagonal part of 1878 the local submatrix with either d_nz or d_nnz (not both). Set 1879 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 1880 memory allocation. Likewise, specify preallocated storage for the 1881 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1882 1883 You can call MatGetInfo() to get information on how effective the preallocation was; 1884 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1885 You can also run with the option -info and look for messages with the string 1886 malloc in them to see if additional memory allocation was needed. 1887 1888 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1889 the figure below we depict these three local rows and all columns (0-11). 1890 1891 .vb 1892 0 1 2 3 4 5 6 7 8 9 10 11 1893 -------------------------- 1894 row 3 |. . . d d d o o o o o o 1895 row 4 |. . . d d d o o o o o o 1896 row 5 |. . . d d d o o o o o o 1897 -------------------------- 1898 .ve 1899 1900 Thus, any entries in the d locations are stored in the d (diagonal) 1901 submatrix, and any entries in the o locations are stored in the 1902 o (off-diagonal) submatrix. Note that the d matrix is stored in 1903 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1904 1905 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1906 plus the diagonal part of the d matrix, 1907 and o_nz should indicate the number of block nonzeros per row in the o matrix 1908 1909 In general, for PDE problems in which most nonzeros are near the diagonal, 1910 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1911 or you will get TERRIBLE performance; see the users' manual chapter on 1912 matrices. 1913 1914 Level: intermediate 1915 1916 .keywords: matrix, block, aij, compressed row, sparse, parallel 1917 1918 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 1919 @*/ 1920 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1921 { 1922 PetscErrorCode ierr; 1923 1924 PetscFunctionBegin; 1925 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1926 PetscValidType(B,1); 1927 PetscValidLogicalCollectiveInt(B,bs,2); 1928 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1929 PetscFunctionReturn(0); 1930 } 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatCreateSBAIJ" 1934 /*@C 1935 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1936 (block compressed row). For good matrix assembly performance 1937 the user should preallocate the matrix storage by setting the parameters 1938 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1939 performance can be increased by more than a factor of 50. 1940 1941 Collective on MPI_Comm 1942 1943 Input Parameters: 1944 + comm - MPI communicator 1945 . bs - size of blockk 1946 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1947 This value should be the same as the local size used in creating the 1948 y vector for the matrix-vector product y = Ax. 1949 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1950 This value should be the same as the local size used in creating the 1951 x vector for the matrix-vector product y = Ax. 1952 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1953 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1954 . d_nz - number of block nonzeros per block row in diagonal portion of local 1955 submatrix (same for all local rows) 1956 . d_nnz - array containing the number of block nonzeros in the various block rows 1957 in the upper triangular portion of the in diagonal portion of the local 1958 (possibly different for each block block row) or NULL. 1959 If you plan to factor the matrix you must leave room for the diagonal entry and 1960 set its value even if it is zero. 1961 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1962 submatrix (same for all local rows). 1963 - o_nnz - array containing the number of nonzeros in the various block rows of the 1964 off-diagonal portion of the local submatrix (possibly different for 1965 each block row) or NULL. 1966 1967 Output Parameter: 1968 . A - the matrix 1969 1970 Options Database Keys: 1971 . -mat_no_unroll - uses code that does not unroll the loops in the 1972 block calculations (much slower) 1973 . -mat_block_size - size of the blocks to use 1974 . -mat_mpi - use the parallel matrix data structures even on one processor 1975 (defaults to using SeqBAIJ format on one processor) 1976 1977 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1978 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1979 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1980 1981 Notes: 1982 The number of rows and columns must be divisible by blocksize. 1983 This matrix type does not support complex Hermitian operation. 1984 1985 The user MUST specify either the local or global matrix dimensions 1986 (possibly both). 1987 1988 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1989 than it must be used on all processors that share the object for that argument. 1990 1991 If the *_nnz parameter is given then the *_nz parameter is ignored 1992 1993 Storage Information: 1994 For a square global matrix we define each processor's diagonal portion 1995 to be its local rows and the corresponding columns (a square submatrix); 1996 each processor's off-diagonal portion encompasses the remainder of the 1997 local matrix (a rectangular submatrix). 1998 1999 The user can specify preallocated storage for the diagonal part of 2000 the local submatrix with either d_nz or d_nnz (not both). Set 2001 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2002 memory allocation. Likewise, specify preallocated storage for the 2003 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2004 2005 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2006 the figure below we depict these three local rows and all columns (0-11). 2007 2008 .vb 2009 0 1 2 3 4 5 6 7 8 9 10 11 2010 -------------------------- 2011 row 3 |. . . d d d o o o o o o 2012 row 4 |. . . d d d o o o o o o 2013 row 5 |. . . d d d o o o o o o 2014 -------------------------- 2015 .ve 2016 2017 Thus, any entries in the d locations are stored in the d (diagonal) 2018 submatrix, and any entries in the o locations are stored in the 2019 o (off-diagonal) submatrix. Note that the d matrix is stored in 2020 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2021 2022 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2023 plus the diagonal part of the d matrix, 2024 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2025 In general, for PDE problems in which most nonzeros are near the diagonal, 2026 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2027 or you will get TERRIBLE performance; see the users' manual chapter on 2028 matrices. 2029 2030 Level: intermediate 2031 2032 .keywords: matrix, block, aij, compressed row, sparse, parallel 2033 2034 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2035 @*/ 2036 2037 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2038 { 2039 PetscErrorCode ierr; 2040 PetscMPIInt size; 2041 2042 PetscFunctionBegin; 2043 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2044 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2046 if (size > 1) { 2047 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2048 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2049 } else { 2050 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2051 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2052 } 2053 PetscFunctionReturn(0); 2054 } 2055 2056 2057 #undef __FUNCT__ 2058 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2059 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2060 { 2061 Mat mat; 2062 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2063 PetscErrorCode ierr; 2064 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2065 PetscScalar *array; 2066 2067 PetscFunctionBegin; 2068 *newmat = 0; 2069 2070 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2071 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2072 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2073 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2074 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2075 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2076 2077 mat->factortype = matin->factortype; 2078 mat->preallocated = PETSC_TRUE; 2079 mat->assembled = PETSC_TRUE; 2080 mat->insertmode = NOT_SET_VALUES; 2081 2082 a = (Mat_MPISBAIJ*)mat->data; 2083 a->bs2 = oldmat->bs2; 2084 a->mbs = oldmat->mbs; 2085 a->nbs = oldmat->nbs; 2086 a->Mbs = oldmat->Mbs; 2087 a->Nbs = oldmat->Nbs; 2088 2089 2090 a->size = oldmat->size; 2091 a->rank = oldmat->rank; 2092 a->donotstash = oldmat->donotstash; 2093 a->roworiented = oldmat->roworiented; 2094 a->rowindices = 0; 2095 a->rowvalues = 0; 2096 a->getrowactive = PETSC_FALSE; 2097 a->barray = 0; 2098 a->rstartbs = oldmat->rstartbs; 2099 a->rendbs = oldmat->rendbs; 2100 a->cstartbs = oldmat->cstartbs; 2101 a->cendbs = oldmat->cendbs; 2102 2103 /* hash table stuff */ 2104 a->ht = 0; 2105 a->hd = 0; 2106 a->ht_size = 0; 2107 a->ht_flag = oldmat->ht_flag; 2108 a->ht_fact = oldmat->ht_fact; 2109 a->ht_total_ct = 0; 2110 a->ht_insert_ct = 0; 2111 2112 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2113 if (oldmat->colmap) { 2114 #if defined(PETSC_USE_CTABLE) 2115 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2116 #else 2117 ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr); 2118 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2119 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2120 #endif 2121 } else a->colmap = 0; 2122 2123 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2124 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2125 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2126 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2127 } else a->garray = 0; 2128 2129 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2130 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2131 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2132 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2133 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2134 2135 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2136 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2137 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2138 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2139 2140 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2141 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2142 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2143 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2144 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2145 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2146 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2147 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2148 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2149 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2150 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2151 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2152 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2153 2154 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2155 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2156 a->sMvctx = oldmat->sMvctx; 2157 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2158 2159 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2160 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2161 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2162 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2163 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2164 *newmat = mat; 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatLoad_MPISBAIJ" 2170 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2171 { 2172 PetscErrorCode ierr; 2173 PetscInt i,nz,j,rstart,rend; 2174 PetscScalar *vals,*buf; 2175 MPI_Comm comm; 2176 MPI_Status status; 2177 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2178 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2179 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2180 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2181 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2182 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2183 int fd; 2184 2185 PetscFunctionBegin; 2186 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2187 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2188 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2189 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2190 if (bs < 0) bs = 1; 2191 2192 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2193 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2194 if (!rank) { 2195 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2196 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2197 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2198 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2199 } 2200 2201 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2202 2203 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2204 M = header[1]; 2205 N = header[2]; 2206 2207 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2208 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2209 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2210 2211 /* If global sizes are set, check if they are consistent with that given in the file */ 2212 if (sizesset) { 2213 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2214 } 2215 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2216 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2217 2218 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2219 2220 /* 2221 This code adds extra rows to make sure the number of rows is 2222 divisible by the blocksize 2223 */ 2224 Mbs = M/bs; 2225 extra_rows = bs - M + bs*(Mbs); 2226 if (extra_rows == bs) extra_rows = 0; 2227 else Mbs++; 2228 if (extra_rows &&!rank) { 2229 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2230 } 2231 2232 /* determine ownership of all rows */ 2233 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2234 mbs = Mbs/size + ((Mbs % size) > rank); 2235 m = mbs*bs; 2236 } else { /* User Set */ 2237 m = newmat->rmap->n; 2238 mbs = m/bs; 2239 } 2240 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2241 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2242 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2243 rowners[0] = 0; 2244 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2245 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2246 rstart = rowners[rank]; 2247 rend = rowners[rank+1]; 2248 2249 /* distribute row lengths to all processors */ 2250 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2251 if (!rank) { 2252 ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr); 2253 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2254 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2255 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2256 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2257 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2258 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2259 } else { 2260 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2261 } 2262 2263 if (!rank) { /* procs[0] */ 2264 /* calculate the number of nonzeros on each processor */ 2265 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2266 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2267 for (i=0; i<size; i++) { 2268 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2269 procsnz[i] += rowlengths[j]; 2270 } 2271 } 2272 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2273 2274 /* determine max buffer needed and allocate it */ 2275 maxnz = 0; 2276 for (i=0; i<size; i++) { 2277 maxnz = PetscMax(maxnz,procsnz[i]); 2278 } 2279 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2280 2281 /* read in my part of the matrix column indices */ 2282 nz = procsnz[0]; 2283 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2284 mycols = ibuf; 2285 if (size == 1) nz -= extra_rows; 2286 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2287 if (size == 1) { 2288 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2289 } 2290 2291 /* read in every ones (except the last) and ship off */ 2292 for (i=1; i<size-1; i++) { 2293 nz = procsnz[i]; 2294 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2295 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2296 } 2297 /* read in the stuff for the last proc */ 2298 if (size != 1) { 2299 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2300 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2301 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2302 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2303 } 2304 ierr = PetscFree(cols);CHKERRQ(ierr); 2305 } else { /* procs[i], i>0 */ 2306 /* determine buffer space needed for message */ 2307 nz = 0; 2308 for (i=0; i<m; i++) nz += locrowlens[i]; 2309 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2310 mycols = ibuf; 2311 /* receive message of column indices*/ 2312 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2313 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2314 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2315 } 2316 2317 /* loop over local rows, determining number of off diagonal entries */ 2318 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2319 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2320 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2321 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2322 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2323 rowcount = 0; 2324 nzcount = 0; 2325 for (i=0; i<mbs; i++) { 2326 dcount = 0; 2327 odcount = 0; 2328 for (j=0; j<bs; j++) { 2329 kmax = locrowlens[rowcount]; 2330 for (k=0; k<kmax; k++) { 2331 tmp = mycols[nzcount++]/bs; /* block col. index */ 2332 if (!mask[tmp]) { 2333 mask[tmp] = 1; 2334 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2335 else masked1[dcount++] = tmp; /* entry in diag portion */ 2336 } 2337 } 2338 rowcount++; 2339 } 2340 2341 dlens[i] = dcount; /* d_nzz[i] */ 2342 odlens[i] = odcount; /* o_nzz[i] */ 2343 2344 /* zero out the mask elements we set */ 2345 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2346 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2347 } 2348 if (!sizesset) { 2349 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2350 } 2351 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2352 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2353 2354 if (!rank) { 2355 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2356 /* read in my part of the matrix numerical values */ 2357 nz = procsnz[0]; 2358 vals = buf; 2359 mycols = ibuf; 2360 if (size == 1) nz -= extra_rows; 2361 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2362 if (size == 1) { 2363 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2364 } 2365 2366 /* insert into matrix */ 2367 jj = rstart*bs; 2368 for (i=0; i<m; i++) { 2369 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2370 mycols += locrowlens[i]; 2371 vals += locrowlens[i]; 2372 jj++; 2373 } 2374 2375 /* read in other processors (except the last one) and ship out */ 2376 for (i=1; i<size-1; i++) { 2377 nz = procsnz[i]; 2378 vals = buf; 2379 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2380 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2381 } 2382 /* the last proc */ 2383 if (size != 1) { 2384 nz = procsnz[i] - extra_rows; 2385 vals = buf; 2386 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2387 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2388 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2389 } 2390 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2391 2392 } else { 2393 /* receive numeric values */ 2394 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2395 2396 /* receive message of values*/ 2397 vals = buf; 2398 mycols = ibuf; 2399 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2400 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2401 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2402 2403 /* insert into matrix */ 2404 jj = rstart*bs; 2405 for (i=0; i<m; i++) { 2406 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2407 mycols += locrowlens[i]; 2408 vals += locrowlens[i]; 2409 jj++; 2410 } 2411 } 2412 2413 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2414 ierr = PetscFree(buf);CHKERRQ(ierr); 2415 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2416 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2417 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2418 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2419 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2420 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2421 PetscFunctionReturn(0); 2422 } 2423 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2426 /*XXXXX@ 2427 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2428 2429 Input Parameters: 2430 . mat - the matrix 2431 . fact - factor 2432 2433 Not Collective on Mat, each process can have a different hash factor 2434 2435 Level: advanced 2436 2437 Notes: 2438 This can also be set by the command line option: -mat_use_hash_table fact 2439 2440 .keywords: matrix, hashtable, factor, HT 2441 2442 .seealso: MatSetOption() 2443 @XXXXX*/ 2444 2445 2446 #undef __FUNCT__ 2447 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2448 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2449 { 2450 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2451 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2452 PetscReal atmp; 2453 PetscReal *work,*svalues,*rvalues; 2454 PetscErrorCode ierr; 2455 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2456 PetscMPIInt rank,size; 2457 PetscInt *rowners_bs,dest,count,source; 2458 PetscScalar *va; 2459 MatScalar *ba; 2460 MPI_Status stat; 2461 2462 PetscFunctionBegin; 2463 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2464 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2465 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2466 2467 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2468 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2469 2470 bs = A->rmap->bs; 2471 mbs = a->mbs; 2472 Mbs = a->Mbs; 2473 ba = b->a; 2474 bi = b->i; 2475 bj = b->j; 2476 2477 /* find ownerships */ 2478 rowners_bs = A->rmap->range; 2479 2480 /* each proc creates an array to be distributed */ 2481 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2482 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2483 2484 /* row_max for B */ 2485 if (rank != size-1) { 2486 for (i=0; i<mbs; i++) { 2487 ncols = bi[1] - bi[0]; bi++; 2488 brow = bs*i; 2489 for (j=0; j<ncols; j++) { 2490 bcol = bs*(*bj); 2491 for (kcol=0; kcol<bs; kcol++) { 2492 col = bcol + kcol; /* local col index */ 2493 col += rowners_bs[rank+1]; /* global col index */ 2494 for (krow=0; krow<bs; krow++) { 2495 atmp = PetscAbsScalar(*ba); ba++; 2496 row = brow + krow; /* local row index */ 2497 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2498 if (work[col] < atmp) work[col] = atmp; 2499 } 2500 } 2501 bj++; 2502 } 2503 } 2504 2505 /* send values to its owners */ 2506 for (dest=rank+1; dest<size; dest++) { 2507 svalues = work + rowners_bs[dest]; 2508 count = rowners_bs[dest+1]-rowners_bs[dest]; 2509 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2510 } 2511 } 2512 2513 /* receive values */ 2514 if (rank) { 2515 rvalues = work; 2516 count = rowners_bs[rank+1]-rowners_bs[rank]; 2517 for (source=0; source<rank; source++) { 2518 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2519 /* process values */ 2520 for (i=0; i<count; i++) { 2521 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2522 } 2523 } 2524 } 2525 2526 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2527 ierr = PetscFree(work);CHKERRQ(ierr); 2528 PetscFunctionReturn(0); 2529 } 2530 2531 #undef __FUNCT__ 2532 #define __FUNCT__ "MatSOR_MPISBAIJ" 2533 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2534 { 2535 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2536 PetscErrorCode ierr; 2537 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2538 PetscScalar *x,*ptr,*from; 2539 Vec bb1; 2540 const PetscScalar *b; 2541 2542 PetscFunctionBegin; 2543 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2544 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2545 2546 if (flag == SOR_APPLY_UPPER) { 2547 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2548 PetscFunctionReturn(0); 2549 } 2550 2551 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2552 if (flag & SOR_ZERO_INITIAL_GUESS) { 2553 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2554 its--; 2555 } 2556 2557 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2558 while (its--) { 2559 2560 /* lower triangular part: slvec0b = - B^T*xx */ 2561 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2562 2563 /* copy xx into slvec0a */ 2564 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2565 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2566 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2567 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2568 2569 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2570 2571 /* copy bb into slvec1a */ 2572 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2573 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2574 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2575 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2576 2577 /* set slvec1b = 0 */ 2578 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2579 2580 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2581 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2582 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2583 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2584 2585 /* upper triangular part: bb1 = bb1 - B*x */ 2586 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2587 2588 /* local diagonal sweep */ 2589 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2590 } 2591 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2592 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2593 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2594 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2595 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2596 } else if (flag & SOR_EISENSTAT) { 2597 Vec xx1; 2598 PetscBool hasop; 2599 const PetscScalar *diag; 2600 PetscScalar *sl,scale = (omega - 2.0)/omega; 2601 PetscInt i,n; 2602 2603 if (!mat->xx1) { 2604 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2605 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2606 } 2607 xx1 = mat->xx1; 2608 bb1 = mat->bb1; 2609 2610 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2611 2612 if (!mat->diag) { 2613 /* this is wrong for same matrix with new nonzero values */ 2614 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2615 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2616 } 2617 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2618 2619 if (hasop) { 2620 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2621 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2622 } else { 2623 /* 2624 These two lines are replaced by code that may be a bit faster for a good compiler 2625 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2626 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2627 */ 2628 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2629 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2630 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2631 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2632 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2633 if (omega == 1.0) { 2634 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2635 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2636 } else { 2637 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2638 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2639 } 2640 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2641 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2642 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2643 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2644 } 2645 2646 /* multiply off-diagonal portion of matrix */ 2647 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2648 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2649 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2650 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2651 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2652 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2653 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2654 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2655 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2656 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2657 2658 /* local sweep */ 2659 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2660 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 #undef __FUNCT__ 2666 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2667 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2668 { 2669 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2670 PetscErrorCode ierr; 2671 Vec lvec1,bb1; 2672 2673 PetscFunctionBegin; 2674 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2675 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2676 2677 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2678 if (flag & SOR_ZERO_INITIAL_GUESS) { 2679 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2680 its--; 2681 } 2682 2683 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2684 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2685 while (its--) { 2686 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2687 2688 /* lower diagonal part: bb1 = bb - B^T*xx */ 2689 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2690 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2691 2692 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2693 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2694 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2695 2696 /* upper diagonal part: bb1 = bb1 - B*x */ 2697 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2698 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2699 2700 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2701 2702 /* diagonal sweep */ 2703 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2704 } 2705 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2706 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2707 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2708 PetscFunctionReturn(0); 2709 } 2710 2711 #undef __FUNCT__ 2712 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2713 /*@ 2714 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2715 CSR format the local rows. 2716 2717 Collective on MPI_Comm 2718 2719 Input Parameters: 2720 + comm - MPI communicator 2721 . bs - the block size, only a block size of 1 is supported 2722 . m - number of local rows (Cannot be PETSC_DECIDE) 2723 . n - This value should be the same as the local size used in creating the 2724 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2725 calculated if N is given) For square matrices n is almost always m. 2726 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2727 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2728 . i - row indices 2729 . j - column indices 2730 - a - matrix values 2731 2732 Output Parameter: 2733 . mat - the matrix 2734 2735 Level: intermediate 2736 2737 Notes: 2738 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2739 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2740 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2741 2742 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2743 2744 .keywords: matrix, aij, compressed row, sparse, parallel 2745 2746 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2747 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2748 @*/ 2749 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2750 { 2751 PetscErrorCode ierr; 2752 2753 2754 PetscFunctionBegin; 2755 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2756 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2757 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2758 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2759 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2760 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2761 PetscFunctionReturn(0); 2762 } 2763 2764 2765 #undef __FUNCT__ 2766 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2767 /*@C 2768 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2769 (the default parallel PETSc format). 2770 2771 Collective on MPI_Comm 2772 2773 Input Parameters: 2774 + B - the matrix 2775 . bs - the block size 2776 . i - the indices into j for the start of each local row (starts with zero) 2777 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2778 - v - optional values in the matrix 2779 2780 Level: developer 2781 2782 .keywords: matrix, aij, compressed row, sparse, parallel 2783 2784 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2785 @*/ 2786 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2787 { 2788 PetscErrorCode ierr; 2789 2790 PetscFunctionBegin; 2791 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2792 PetscFunctionReturn(0); 2793 } 2794 2795 2796