1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 9 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 10 { 11 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 12 PetscErrorCode ierr; 13 14 PetscFunctionBegin; 15 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 16 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 17 PetscFunctionReturn(0); 18 } 19 20 #undef __FUNCT__ 21 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 22 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 23 { 24 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 29 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 33 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 34 { \ 35 \ 36 brow = row/bs; \ 37 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 38 rmax = aimax[brow]; nrow = ailen[brow]; \ 39 bcol = col/bs; \ 40 ridx = row % bs; cidx = col % bs; \ 41 low = 0; high = nrow; \ 42 while (high-low > 3) { \ 43 t = (low+high)/2; \ 44 if (rp[t] > bcol) high = t; \ 45 else low = t; \ 46 } \ 47 for (_i=low; _i<high; _i++) { \ 48 if (rp[_i] > bcol) break; \ 49 if (rp[_i] == bcol) { \ 50 bap = ap + bs2*_i + bs*cidx + ridx; \ 51 if (addv == ADD_VALUES) *bap += value; \ 52 else *bap = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (a->nonew == 1) goto a_noinsert; \ 57 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 59 N = nrow++ - 1; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp[ii+1] = rp[ii]; \ 63 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 64 } \ 65 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 66 rp[_i] = bcol; \ 67 ap[bs2*_i + bs*cidx + ridx] = value; \ 68 A->nonzerostate++;\ 69 a_noinsert:; \ 70 ailen[brow] = nrow; \ 71 } 72 73 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 74 { \ 75 brow = row/bs; \ 76 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 77 rmax = bimax[brow]; nrow = bilen[brow]; \ 78 bcol = col/bs; \ 79 ridx = row % bs; cidx = col % bs; \ 80 low = 0; high = nrow; \ 81 while (high-low > 3) { \ 82 t = (low+high)/2; \ 83 if (rp[t] > bcol) high = t; \ 84 else low = t; \ 85 } \ 86 for (_i=low; _i<high; _i++) { \ 87 if (rp[_i] > bcol) break; \ 88 if (rp[_i] == bcol) { \ 89 bap = ap + bs2*_i + bs*cidx + ridx; \ 90 if (addv == ADD_VALUES) *bap += value; \ 91 else *bap = value; \ 92 goto b_noinsert; \ 93 } \ 94 } \ 95 if (b->nonew == 1) goto b_noinsert; \ 96 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 97 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 98 N = nrow++ - 1; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 103 } \ 104 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 105 rp[_i] = bcol; \ 106 ap[bs2*_i + bs*cidx + ridx] = value; \ 107 B->nonzerostate++;\ 108 b_noinsert:; \ 109 bilen[brow] = nrow; \ 110 } 111 112 /* Only add/insert a(i,j) with i<=j (blocks). 113 Any a(i,j) with i>j input by user is ingored. 114 */ 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatSetValues_MPISBAIJ" 117 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 118 { 119 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 120 MatScalar value; 121 PetscBool roworiented = baij->roworiented; 122 PetscErrorCode ierr; 123 PetscInt i,j,row,col; 124 PetscInt rstart_orig=mat->rmap->rstart; 125 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 126 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 127 128 /* Some Variables required in the macro */ 129 Mat A = baij->A; 130 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 131 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 132 MatScalar *aa =a->a; 133 134 Mat B = baij->B; 135 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 136 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 137 MatScalar *ba =b->a; 138 139 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 140 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 141 MatScalar *ap,*bap; 142 143 /* for stash */ 144 PetscInt n_loc, *in_loc = NULL; 145 MatScalar *v_loc = NULL; 146 147 PetscFunctionBegin; 148 if (!baij->donotstash) { 149 if (n > baij->n_loc) { 150 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 151 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 152 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 153 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 154 155 baij->n_loc = n; 156 } 157 in_loc = baij->in_loc; 158 v_loc = baij->v_loc; 159 } 160 161 for (i=0; i<m; i++) { 162 if (im[i] < 0) continue; 163 #if defined(PETSC_USE_DEBUG) 164 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 165 #endif 166 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 167 row = im[i] - rstart_orig; /* local row index */ 168 for (j=0; j<n; j++) { 169 if (im[i]/bs > in[j]/bs) { 170 if (a->ignore_ltriangular) { 171 continue; /* ignore lower triangular blocks */ 172 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 173 } 174 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 175 col = in[j] - cstart_orig; /* local col index */ 176 brow = row/bs; bcol = col/bs; 177 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 178 if (roworiented) value = v[i*n+j]; 179 else value = v[i+j*m]; 180 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 181 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 182 } else if (in[j] < 0) continue; 183 #if defined(PETSC_USE_DEBUG) 184 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 185 #endif 186 else { /* off-diag entry (B) */ 187 if (mat->was_assembled) { 188 if (!baij->colmap) { 189 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 190 } 191 #if defined(PETSC_USE_CTABLE) 192 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 193 col = col - 1; 194 #else 195 col = baij->colmap[in[j]/bs] - 1; 196 #endif 197 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 198 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 199 col = in[j]; 200 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 201 B = baij->B; 202 b = (Mat_SeqBAIJ*)(B)->data; 203 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 204 ba = b->a; 205 } else col += in[j]%bs; 206 } else col = in[j]; 207 if (roworiented) value = v[i*n+j]; 208 else value = v[i+j*m]; 209 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 210 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 211 } 212 } 213 } else { /* off processor entry */ 214 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 215 if (!baij->donotstash) { 216 mat->assembled = PETSC_FALSE; 217 n_loc = 0; 218 for (j=0; j<n; j++) { 219 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 220 in_loc[n_loc] = in[j]; 221 if (roworiented) { 222 v_loc[n_loc] = v[i*n+j]; 223 } else { 224 v_loc[n_loc] = v[j*m+i]; 225 } 226 n_loc++; 227 } 228 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 229 } 230 } 231 } 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 237 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 238 { 239 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 240 const MatScalar *value; 241 MatScalar *barray =baij->barray; 242 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 243 PetscErrorCode ierr; 244 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 245 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 246 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 247 248 PetscFunctionBegin; 249 if (!barray) { 250 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 251 baij->barray = barray; 252 } 253 254 if (roworiented) { 255 stepval = (n-1)*bs; 256 } else { 257 stepval = (m-1)*bs; 258 } 259 for (i=0; i<m; i++) { 260 if (im[i] < 0) continue; 261 #if defined(PETSC_USE_DEBUG) 262 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 263 #endif 264 if (im[i] >= rstart && im[i] < rend) { 265 row = im[i] - rstart; 266 for (j=0; j<n; j++) { 267 if (im[i] > in[j]) { 268 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 269 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 270 } 271 /* If NumCol = 1 then a copy is not required */ 272 if ((roworiented) && (n == 1)) { 273 barray = (MatScalar*) v + i*bs2; 274 } else if ((!roworiented) && (m == 1)) { 275 barray = (MatScalar*) v + j*bs2; 276 } else { /* Here a copy is required */ 277 if (roworiented) { 278 value = v + i*(stepval+bs)*bs + j*bs; 279 } else { 280 value = v + j*(stepval+bs)*bs + i*bs; 281 } 282 for (ii=0; ii<bs; ii++,value+=stepval) { 283 for (jj=0; jj<bs; jj++) { 284 *barray++ = *value++; 285 } 286 } 287 barray -=bs2; 288 } 289 290 if (in[j] >= cstart && in[j] < cend) { 291 col = in[j] - cstart; 292 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 293 } else if (in[j] < 0) continue; 294 #if defined(PETSC_USE_DEBUG) 295 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 296 #endif 297 else { 298 if (mat->was_assembled) { 299 if (!baij->colmap) { 300 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 303 #if defined(PETSC_USE_DEBUG) 304 #if defined(PETSC_USE_CTABLE) 305 { PetscInt data; 306 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 307 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 308 } 309 #else 310 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 311 #endif 312 #endif 313 #if defined(PETSC_USE_CTABLE) 314 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 315 col = (col - 1)/bs; 316 #else 317 col = (baij->colmap[in[j]] - 1)/bs; 318 #endif 319 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 320 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 321 col = in[j]; 322 } 323 } else col = in[j]; 324 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 325 } 326 } 327 } else { 328 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 329 if (!baij->donotstash) { 330 if (roworiented) { 331 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 332 } else { 333 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 334 } 335 } 336 } 337 } 338 PetscFunctionReturn(0); 339 } 340 341 #undef __FUNCT__ 342 #define __FUNCT__ "MatGetValues_MPISBAIJ" 343 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 344 { 345 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 346 PetscErrorCode ierr; 347 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 348 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 349 350 PetscFunctionBegin; 351 for (i=0; i<m; i++) { 352 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 353 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 354 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 355 row = idxm[i] - bsrstart; 356 for (j=0; j<n; j++) { 357 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 358 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 359 if (idxn[j] >= bscstart && idxn[j] < bscend) { 360 col = idxn[j] - bscstart; 361 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 362 } else { 363 if (!baij->colmap) { 364 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 365 } 366 #if defined(PETSC_USE_CTABLE) 367 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 368 data--; 369 #else 370 data = baij->colmap[idxn[j]/bs]-1; 371 #endif 372 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 373 else { 374 col = data + idxn[j]%bs; 375 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 376 } 377 } 378 } 379 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 380 } 381 PetscFunctionReturn(0); 382 } 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatNorm_MPISBAIJ" 386 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 387 { 388 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 389 PetscErrorCode ierr; 390 PetscReal sum[2],*lnorm2; 391 392 PetscFunctionBegin; 393 if (baij->size == 1) { 394 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 395 } else { 396 if (type == NORM_FROBENIUS) { 397 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 398 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 399 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 400 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 401 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 402 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 403 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 404 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 405 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 406 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 407 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 408 PetscReal *rsum,*rsum2,vabs; 409 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 410 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 411 MatScalar *v; 412 413 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 414 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 415 /* Amat */ 416 v = amat->a; jj = amat->j; 417 for (brow=0; brow<mbs; brow++) { 418 grow = bs*(rstart + brow); 419 nz = amat->i[brow+1] - amat->i[brow]; 420 for (bcol=0; bcol<nz; bcol++) { 421 gcol = bs*(rstart + *jj); jj++; 422 for (col=0; col<bs; col++) { 423 for (row=0; row<bs; row++) { 424 vabs = PetscAbsScalar(*v); v++; 425 rsum[gcol+col] += vabs; 426 /* non-diagonal block */ 427 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 428 } 429 } 430 } 431 } 432 /* Bmat */ 433 v = bmat->a; jj = bmat->j; 434 for (brow=0; brow<mbs; brow++) { 435 grow = bs*(rstart + brow); 436 nz = bmat->i[brow+1] - bmat->i[brow]; 437 for (bcol=0; bcol<nz; bcol++) { 438 gcol = bs*garray[*jj]; jj++; 439 for (col=0; col<bs; col++) { 440 for (row=0; row<bs; row++) { 441 vabs = PetscAbsScalar(*v); v++; 442 rsum[gcol+col] += vabs; 443 rsum[grow+row] += vabs; 444 } 445 } 446 } 447 } 448 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 449 *norm = 0.0; 450 for (col=0; col<mat->cmap->N; col++) { 451 if (rsum2[col] > *norm) *norm = rsum2[col]; 452 } 453 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 454 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 455 } 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 461 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 462 { 463 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 464 PetscErrorCode ierr; 465 PetscInt nstash,reallocs; 466 InsertMode addv; 467 468 PetscFunctionBegin; 469 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 470 471 /* make sure all processors are either in INSERTMODE or ADDMODE */ 472 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 473 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 474 mat->insertmode = addv; /* in case this processor had no cache */ 475 476 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 477 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 478 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 479 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 480 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 481 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 487 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 488 { 489 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 490 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 491 PetscErrorCode ierr; 492 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 493 PetscInt *row,*col; 494 PetscBool other_disassembled; 495 PetscMPIInt n; 496 PetscBool r1,r2,r3; 497 MatScalar *val; 498 InsertMode addv = mat->insertmode; 499 500 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 501 PetscFunctionBegin; 502 if (!baij->donotstash && !mat->nooffprocentries) { 503 while (1) { 504 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 505 if (!flg) break; 506 507 for (i=0; i<n;) { 508 /* Now identify the consecutive vals belonging to the same row */ 509 for (j=i,rstart=row[j]; j<n; j++) { 510 if (row[j] != rstart) break; 511 } 512 if (j < n) ncols = j-i; 513 else ncols = n-i; 514 /* Now assemble all these values with a single function call */ 515 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 516 i = j; 517 } 518 } 519 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 520 /* Now process the block-stash. Since the values are stashed column-oriented, 521 set the roworiented flag to column oriented, and after MatSetValues() 522 restore the original flags */ 523 r1 = baij->roworiented; 524 r2 = a->roworiented; 525 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 526 527 baij->roworiented = PETSC_FALSE; 528 a->roworiented = PETSC_FALSE; 529 530 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 531 while (1) { 532 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 533 if (!flg) break; 534 535 for (i=0; i<n;) { 536 /* Now identify the consecutive vals belonging to the same row */ 537 for (j=i,rstart=row[j]; j<n; j++) { 538 if (row[j] != rstart) break; 539 } 540 if (j < n) ncols = j-i; 541 else ncols = n-i; 542 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 543 i = j; 544 } 545 } 546 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 547 548 baij->roworiented = r1; 549 a->roworiented = r2; 550 551 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 552 } 553 554 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 555 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 556 557 /* determine if any processor has disassembled, if so we must 558 also disassemble ourselfs, in order that we may reassemble. */ 559 /* 560 if nonzero structure of submatrix B cannot change then we know that 561 no processor disassembled thus we can skip this stuff 562 */ 563 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 564 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 565 if (mat->was_assembled && !other_disassembled) { 566 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 567 } 568 } 569 570 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 571 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 572 } 573 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 575 576 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 577 578 baij->rowvalues = 0; 579 580 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 581 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 582 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 583 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 584 } 585 PetscFunctionReturn(0); 586 } 587 588 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 589 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 590 #include <petscdraw.h> 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 593 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 594 { 595 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 596 PetscErrorCode ierr; 597 PetscInt bs = mat->rmap->bs; 598 PetscMPIInt rank = baij->rank; 599 PetscBool iascii,isdraw; 600 PetscViewer sviewer; 601 PetscViewerFormat format; 602 603 PetscFunctionBegin; 604 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 605 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 606 if (iascii) { 607 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 608 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 609 MatInfo info; 610 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 611 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 612 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 613 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 614 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 615 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 616 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 617 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 618 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 619 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 620 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 621 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } else if (format == PETSC_VIEWER_ASCII_INFO) { 624 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 625 PetscFunctionReturn(0); 626 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 627 PetscFunctionReturn(0); 628 } 629 } 630 631 if (isdraw) { 632 PetscDraw draw; 633 PetscBool isnull; 634 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 635 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 636 } 637 638 { 639 /* assemble the entire matrix onto first processor. */ 640 Mat A; 641 Mat_SeqSBAIJ *Aloc; 642 Mat_SeqBAIJ *Bloc; 643 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 644 MatScalar *a; 645 const char *matname; 646 647 /* Should this be the same type as mat? */ 648 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 649 if (!rank) { 650 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 651 } else { 652 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 653 } 654 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 655 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 656 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 657 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 658 659 /* copy over the A part */ 660 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 661 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 662 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 663 664 for (i=0; i<mbs; i++) { 665 rvals[0] = bs*(baij->rstartbs + i); 666 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 667 for (j=ai[i]; j<ai[i+1]; j++) { 668 col = (baij->cstartbs+aj[j])*bs; 669 for (k=0; k<bs; k++) { 670 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 671 col++; 672 a += bs; 673 } 674 } 675 } 676 /* copy over the B part */ 677 Bloc = (Mat_SeqBAIJ*)baij->B->data; 678 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 679 for (i=0; i<mbs; i++) { 680 681 rvals[0] = bs*(baij->rstartbs + i); 682 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 683 for (j=ai[i]; j<ai[i+1]; j++) { 684 col = baij->garray[aj[j]]*bs; 685 for (k=0; k<bs; k++) { 686 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 687 col++; 688 a += bs; 689 } 690 } 691 } 692 ierr = PetscFree(rvals);CHKERRQ(ierr); 693 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 694 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 695 /* 696 Everyone has to call to draw the matrix since the graphics waits are 697 synchronized across all processors that share the PetscDraw object 698 */ 699 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 700 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 701 if (!rank) { 702 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 703 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 704 } 705 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 706 ierr = MatDestroy(&A);CHKERRQ(ierr); 707 } 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatView_MPISBAIJ" 713 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 714 { 715 PetscErrorCode ierr; 716 PetscBool iascii,isdraw,issocket,isbinary; 717 718 PetscFunctionBegin; 719 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 720 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 721 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 722 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 723 if (iascii || isdraw || issocket || isbinary) { 724 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 725 } 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatDestroy_MPISBAIJ" 731 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 732 { 733 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 #if defined(PETSC_USE_LOG) 738 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 739 #endif 740 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 741 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 742 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 743 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 744 #if defined(PETSC_USE_CTABLE) 745 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 746 #else 747 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 748 #endif 749 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 750 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 751 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 752 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 753 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 754 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 755 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 756 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 757 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 758 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 759 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 760 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 761 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 762 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 763 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 764 #if defined(PETSC_USE_REAL_MAT_SINGLE) 765 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 766 #endif 767 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 768 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 769 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 770 ierr = PetscFree(mat->data);CHKERRQ(ierr); 771 772 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 773 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 774 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 775 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 776 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 777 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 783 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 784 { 785 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 786 PetscErrorCode ierr; 787 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 788 PetscScalar *x,*from; 789 790 PetscFunctionBegin; 791 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 792 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 793 794 /* diagonal part */ 795 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 796 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 797 798 /* subdiagonal part */ 799 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 800 801 /* copy x into the vec slvec0 */ 802 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 803 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 804 805 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 806 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 807 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 808 809 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 810 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 811 /* supperdiagonal part */ 812 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 813 PetscFunctionReturn(0); 814 } 815 816 #undef __FUNCT__ 817 #define __FUNCT__ "MatMult_MPISBAIJ" 818 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 819 { 820 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 821 PetscErrorCode ierr; 822 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 823 PetscScalar *x,*from; 824 825 PetscFunctionBegin; 826 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 827 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 828 829 /* diagonal part */ 830 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 831 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 832 833 /* subdiagonal part */ 834 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 835 836 /* copy x into the vec slvec0 */ 837 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 838 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 839 840 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 841 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 842 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 843 844 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 845 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 846 /* supperdiagonal part */ 847 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 853 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 854 { 855 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 856 PetscErrorCode ierr; 857 PetscInt nt; 858 859 PetscFunctionBegin; 860 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 861 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 862 863 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 864 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 865 866 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 /* do diagonal part */ 868 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 869 /* do supperdiagonal part */ 870 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 872 /* do subdiagonal part */ 873 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 874 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 875 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 881 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 882 { 883 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 884 PetscErrorCode ierr; 885 PetscInt mbs=a->mbs,bs=A->rmap->bs; 886 PetscScalar *x,*from,zero=0.0; 887 888 PetscFunctionBegin; 889 /* 890 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 891 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 892 */ 893 /* diagonal part */ 894 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 895 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 896 897 /* subdiagonal part */ 898 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 899 900 /* copy x into the vec slvec0 */ 901 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 902 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 903 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 904 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 905 906 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 908 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 910 /* supperdiagonal part */ 911 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 917 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 918 { 919 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 920 PetscErrorCode ierr; 921 922 PetscFunctionBegin; 923 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 /* do diagonal part */ 925 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 926 /* do supperdiagonal part */ 927 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 929 930 /* do subdiagonal part */ 931 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 932 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 933 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 934 PetscFunctionReturn(0); 935 } 936 937 /* 938 This only works correctly for square matrices where the subblock A->A is the 939 diagonal block 940 */ 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 943 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 944 { 945 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 946 PetscErrorCode ierr; 947 948 PetscFunctionBegin; 949 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 950 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 951 PetscFunctionReturn(0); 952 } 953 954 #undef __FUNCT__ 955 #define __FUNCT__ "MatScale_MPISBAIJ" 956 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 957 { 958 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 959 PetscErrorCode ierr; 960 961 PetscFunctionBegin; 962 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 963 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 964 PetscFunctionReturn(0); 965 } 966 967 #undef __FUNCT__ 968 #define __FUNCT__ "MatGetRow_MPISBAIJ" 969 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 970 { 971 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 972 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 973 PetscErrorCode ierr; 974 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 975 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 976 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 977 978 PetscFunctionBegin; 979 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 980 mat->getrowactive = PETSC_TRUE; 981 982 if (!mat->rowvalues && (idx || v)) { 983 /* 984 allocate enough space to hold information from the longest row. 985 */ 986 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 987 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 988 PetscInt max = 1,mbs = mat->mbs,tmp; 989 for (i=0; i<mbs; i++) { 990 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 991 if (max < tmp) max = tmp; 992 } 993 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 994 } 995 996 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 997 lrow = row - brstart; /* local row index */ 998 999 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1000 if (!v) {pvA = 0; pvB = 0;} 1001 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1002 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1003 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1004 nztot = nzA + nzB; 1005 1006 cmap = mat->garray; 1007 if (v || idx) { 1008 if (nztot) { 1009 /* Sort by increasing column numbers, assuming A and B already sorted */ 1010 PetscInt imark = -1; 1011 if (v) { 1012 *v = v_p = mat->rowvalues; 1013 for (i=0; i<nzB; i++) { 1014 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1015 else break; 1016 } 1017 imark = i; 1018 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1019 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1020 } 1021 if (idx) { 1022 *idx = idx_p = mat->rowindices; 1023 if (imark > -1) { 1024 for (i=0; i<imark; i++) { 1025 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1026 } 1027 } else { 1028 for (i=0; i<nzB; i++) { 1029 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1030 else break; 1031 } 1032 imark = i; 1033 } 1034 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1035 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1036 } 1037 } else { 1038 if (idx) *idx = 0; 1039 if (v) *v = 0; 1040 } 1041 } 1042 *nz = nztot; 1043 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1044 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1045 PetscFunctionReturn(0); 1046 } 1047 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1050 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1051 { 1052 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1053 1054 PetscFunctionBegin; 1055 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1056 baij->getrowactive = PETSC_FALSE; 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1062 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1063 { 1064 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1065 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1066 1067 PetscFunctionBegin; 1068 aA->getrow_utriangular = PETSC_TRUE; 1069 PetscFunctionReturn(0); 1070 } 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1073 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1074 { 1075 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1076 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1077 1078 PetscFunctionBegin; 1079 aA->getrow_utriangular = PETSC_FALSE; 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1085 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1086 { 1087 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1088 PetscErrorCode ierr; 1089 1090 PetscFunctionBegin; 1091 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1092 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1098 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1099 { 1100 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1105 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1111 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1112 { 1113 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1114 PetscErrorCode ierr; 1115 1116 PetscFunctionBegin; 1117 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1118 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1124 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1125 { 1126 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1127 Mat A = a->A,B = a->B; 1128 PetscErrorCode ierr; 1129 PetscReal isend[5],irecv[5]; 1130 1131 PetscFunctionBegin; 1132 info->block_size = (PetscReal)matin->rmap->bs; 1133 1134 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1135 1136 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1137 isend[3] = info->memory; isend[4] = info->mallocs; 1138 1139 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1140 1141 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1142 isend[3] += info->memory; isend[4] += info->mallocs; 1143 if (flag == MAT_LOCAL) { 1144 info->nz_used = isend[0]; 1145 info->nz_allocated = isend[1]; 1146 info->nz_unneeded = isend[2]; 1147 info->memory = isend[3]; 1148 info->mallocs = isend[4]; 1149 } else if (flag == MAT_GLOBAL_MAX) { 1150 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1151 1152 info->nz_used = irecv[0]; 1153 info->nz_allocated = irecv[1]; 1154 info->nz_unneeded = irecv[2]; 1155 info->memory = irecv[3]; 1156 info->mallocs = irecv[4]; 1157 } else if (flag == MAT_GLOBAL_SUM) { 1158 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1159 1160 info->nz_used = irecv[0]; 1161 info->nz_allocated = irecv[1]; 1162 info->nz_unneeded = irecv[2]; 1163 info->memory = irecv[3]; 1164 info->mallocs = irecv[4]; 1165 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1166 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1167 info->fill_ratio_needed = 0; 1168 info->factor_mallocs = 0; 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1174 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1175 { 1176 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1177 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1178 PetscErrorCode ierr; 1179 1180 PetscFunctionBegin; 1181 switch (op) { 1182 case MAT_NEW_NONZERO_LOCATIONS: 1183 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1184 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1185 case MAT_KEEP_NONZERO_PATTERN: 1186 case MAT_NEW_NONZERO_LOCATION_ERR: 1187 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1188 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1189 break; 1190 case MAT_ROW_ORIENTED: 1191 a->roworiented = flg; 1192 1193 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1194 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1195 break; 1196 case MAT_NEW_DIAGONALS: 1197 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1198 break; 1199 case MAT_IGNORE_OFF_PROC_ENTRIES: 1200 a->donotstash = flg; 1201 break; 1202 case MAT_USE_HASH_TABLE: 1203 a->ht_flag = flg; 1204 break; 1205 case MAT_HERMITIAN: 1206 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1207 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1208 1209 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1210 break; 1211 case MAT_SPD: 1212 A->spd_set = PETSC_TRUE; 1213 A->spd = flg; 1214 if (flg) { 1215 A->symmetric = PETSC_TRUE; 1216 A->structurally_symmetric = PETSC_TRUE; 1217 A->symmetric_set = PETSC_TRUE; 1218 A->structurally_symmetric_set = PETSC_TRUE; 1219 } 1220 break; 1221 case MAT_SYMMETRIC: 1222 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1223 break; 1224 case MAT_STRUCTURALLY_SYMMETRIC: 1225 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1226 break; 1227 case MAT_SYMMETRY_ETERNAL: 1228 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1229 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1230 break; 1231 case MAT_IGNORE_LOWER_TRIANGULAR: 1232 aA->ignore_ltriangular = flg; 1233 break; 1234 case MAT_ERROR_LOWER_TRIANGULAR: 1235 aA->ignore_ltriangular = flg; 1236 break; 1237 case MAT_GETROW_UPPERTRIANGULAR: 1238 aA->getrow_utriangular = flg; 1239 break; 1240 default: 1241 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1242 } 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1248 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1249 { 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 if (MAT_INITIAL_MATRIX || *B != A) { 1254 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1261 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1262 { 1263 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1264 Mat a = baij->A, b=baij->B; 1265 PetscErrorCode ierr; 1266 PetscInt nv,m,n; 1267 PetscBool flg; 1268 1269 PetscFunctionBegin; 1270 if (ll != rr) { 1271 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1272 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1273 } 1274 if (!ll) PetscFunctionReturn(0); 1275 1276 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1277 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1278 1279 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1280 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1281 1282 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 1284 /* left diagonalscale the off-diagonal part */ 1285 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1286 1287 /* scale the diagonal part */ 1288 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1289 1290 /* right diagonalscale the off-diagonal part */ 1291 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1292 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1298 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1299 { 1300 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1301 PetscErrorCode ierr; 1302 1303 PetscFunctionBegin; 1304 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatEqual_MPISBAIJ" 1312 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1313 { 1314 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1315 Mat a,b,c,d; 1316 PetscBool flg; 1317 PetscErrorCode ierr; 1318 1319 PetscFunctionBegin; 1320 a = matA->A; b = matA->B; 1321 c = matB->A; d = matB->B; 1322 1323 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1324 if (flg) { 1325 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1326 } 1327 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatCopy_MPISBAIJ" 1333 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1334 { 1335 PetscErrorCode ierr; 1336 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1337 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1338 1339 PetscFunctionBegin; 1340 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1341 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1342 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1343 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1344 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1345 } else { 1346 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1347 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1348 } 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1354 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1355 { 1356 PetscErrorCode ierr; 1357 1358 PetscFunctionBegin; 1359 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #undef __FUNCT__ 1364 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1365 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1366 { 1367 PetscErrorCode ierr; 1368 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1369 PetscBLASInt bnz,one=1; 1370 Mat_SeqSBAIJ *xa,*ya; 1371 Mat_SeqBAIJ *xb,*yb; 1372 1373 PetscFunctionBegin; 1374 if (str == SAME_NONZERO_PATTERN) { 1375 PetscScalar alpha = a; 1376 xa = (Mat_SeqSBAIJ*)xx->A->data; 1377 ya = (Mat_SeqSBAIJ*)yy->A->data; 1378 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1379 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1380 xb = (Mat_SeqBAIJ*)xx->B->data; 1381 yb = (Mat_SeqBAIJ*)yy->B->data; 1382 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1383 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1384 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1385 } else { 1386 Mat B; 1387 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1388 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1389 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1390 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1391 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1392 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1393 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1394 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1395 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1396 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1397 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1398 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1399 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1400 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1401 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1402 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1403 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1404 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1405 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1406 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1413 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1414 { 1415 PetscErrorCode ierr; 1416 PetscInt i; 1417 PetscBool flg; 1418 1419 PetscFunctionBegin; 1420 for (i=0; i<n; i++) { 1421 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1422 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1423 } 1424 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 1429 /* -------------------------------------------------------------------*/ 1430 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1431 MatGetRow_MPISBAIJ, 1432 MatRestoreRow_MPISBAIJ, 1433 MatMult_MPISBAIJ, 1434 /* 4*/ MatMultAdd_MPISBAIJ, 1435 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1436 MatMultAdd_MPISBAIJ, 1437 0, 1438 0, 1439 0, 1440 /* 10*/ 0, 1441 0, 1442 0, 1443 MatSOR_MPISBAIJ, 1444 MatTranspose_MPISBAIJ, 1445 /* 15*/ MatGetInfo_MPISBAIJ, 1446 MatEqual_MPISBAIJ, 1447 MatGetDiagonal_MPISBAIJ, 1448 MatDiagonalScale_MPISBAIJ, 1449 MatNorm_MPISBAIJ, 1450 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1451 MatAssemblyEnd_MPISBAIJ, 1452 MatSetOption_MPISBAIJ, 1453 MatZeroEntries_MPISBAIJ, 1454 /* 24*/ 0, 1455 0, 1456 0, 1457 0, 1458 0, 1459 /* 29*/ MatSetUp_MPISBAIJ, 1460 0, 1461 0, 1462 0, 1463 0, 1464 /* 34*/ MatDuplicate_MPISBAIJ, 1465 0, 1466 0, 1467 0, 1468 0, 1469 /* 39*/ MatAXPY_MPISBAIJ, 1470 MatGetSubMatrices_MPISBAIJ, 1471 MatIncreaseOverlap_MPISBAIJ, 1472 MatGetValues_MPISBAIJ, 1473 MatCopy_MPISBAIJ, 1474 /* 44*/ 0, 1475 MatScale_MPISBAIJ, 1476 0, 1477 0, 1478 0, 1479 /* 49*/ 0, 1480 0, 1481 0, 1482 0, 1483 0, 1484 /* 54*/ 0, 1485 0, 1486 MatSetUnfactored_MPISBAIJ, 1487 0, 1488 MatSetValuesBlocked_MPISBAIJ, 1489 /* 59*/ 0, 1490 0, 1491 0, 1492 0, 1493 0, 1494 /* 64*/ 0, 1495 0, 1496 0, 1497 0, 1498 0, 1499 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1500 0, 1501 0, 1502 0, 1503 0, 1504 /* 74*/ 0, 1505 0, 1506 0, 1507 0, 1508 0, 1509 /* 79*/ 0, 1510 0, 1511 0, 1512 0, 1513 MatLoad_MPISBAIJ, 1514 /* 84*/ 0, 1515 0, 1516 0, 1517 0, 1518 0, 1519 /* 89*/ 0, 1520 0, 1521 0, 1522 0, 1523 0, 1524 /* 94*/ 0, 1525 0, 1526 0, 1527 0, 1528 0, 1529 /* 99*/ 0, 1530 0, 1531 0, 1532 0, 1533 0, 1534 /*104*/ 0, 1535 MatRealPart_MPISBAIJ, 1536 MatImaginaryPart_MPISBAIJ, 1537 MatGetRowUpperTriangular_MPISBAIJ, 1538 MatRestoreRowUpperTriangular_MPISBAIJ, 1539 /*109*/ 0, 1540 0, 1541 0, 1542 0, 1543 0, 1544 /*114*/ 0, 1545 0, 1546 0, 1547 0, 1548 0, 1549 /*119*/ 0, 1550 0, 1551 0, 1552 0, 1553 0, 1554 /*124*/ 0, 1555 0, 1556 0, 1557 0, 1558 0, 1559 /*129*/ 0, 1560 0, 1561 0, 1562 0, 1563 0, 1564 /*134*/ 0, 1565 0, 1566 0, 1567 0, 1568 0, 1569 /*139*/ 0, 1570 0, 1571 0 1572 }; 1573 1574 1575 #undef __FUNCT__ 1576 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1577 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1578 { 1579 PetscFunctionBegin; 1580 *a = ((Mat_MPISBAIJ*)A->data)->A; 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1586 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1587 { 1588 Mat_MPISBAIJ *b; 1589 PetscErrorCode ierr; 1590 PetscInt i,mbs,Mbs; 1591 1592 PetscFunctionBegin; 1593 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1594 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1595 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1596 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1597 1598 b = (Mat_MPISBAIJ*)B->data; 1599 mbs = B->rmap->n/bs; 1600 Mbs = B->rmap->N/bs; 1601 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1602 1603 B->rmap->bs = bs; 1604 b->bs2 = bs*bs; 1605 b->mbs = mbs; 1606 b->nbs = mbs; 1607 b->Mbs = Mbs; 1608 b->Nbs = Mbs; 1609 1610 for (i=0; i<=b->size; i++) { 1611 b->rangebs[i] = B->rmap->range[i]/bs; 1612 } 1613 b->rstartbs = B->rmap->rstart/bs; 1614 b->rendbs = B->rmap->rend/bs; 1615 1616 b->cstartbs = B->cmap->rstart/bs; 1617 b->cendbs = B->cmap->rend/bs; 1618 1619 if (!B->preallocated) { 1620 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1621 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1622 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1623 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1624 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1625 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1626 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1627 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1628 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1629 } 1630 1631 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1632 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1633 1634 B->preallocated = PETSC_TRUE; 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1640 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1641 { 1642 PetscInt m,rstart,cstart,cend; 1643 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1644 const PetscInt *JJ =0; 1645 PetscScalar *values=0; 1646 PetscErrorCode ierr; 1647 1648 PetscFunctionBegin; 1649 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1650 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1651 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1652 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1653 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1654 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1655 m = B->rmap->n/bs; 1656 rstart = B->rmap->rstart/bs; 1657 cstart = B->cmap->rstart/bs; 1658 cend = B->cmap->rend/bs; 1659 1660 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1661 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1662 for (i=0; i<m; i++) { 1663 nz = ii[i+1] - ii[i]; 1664 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1665 nz_max = PetscMax(nz_max,nz); 1666 JJ = jj + ii[i]; 1667 for (j=0; j<nz; j++) { 1668 if (*JJ >= cstart) break; 1669 JJ++; 1670 } 1671 d = 0; 1672 for (; j<nz; j++) { 1673 if (*JJ++ >= cend) break; 1674 d++; 1675 } 1676 d_nnz[i] = d; 1677 o_nnz[i] = nz - d; 1678 } 1679 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1680 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1681 1682 values = (PetscScalar*)V; 1683 if (!values) { 1684 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 1685 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1686 } 1687 for (i=0; i<m; i++) { 1688 PetscInt row = i + rstart; 1689 PetscInt ncols = ii[i+1] - ii[i]; 1690 const PetscInt *icols = jj + ii[i]; 1691 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1692 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1693 } 1694 1695 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1696 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1697 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1698 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #if defined(PETSC_HAVE_MUMPS) 1703 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1704 #endif 1705 #if defined(PETSC_HAVE_PASTIX) 1706 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1707 #endif 1708 1709 /*MC 1710 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1711 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1712 the matrix is stored. 1713 1714 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1715 can call MatSetOption(Mat, MAT_HERMITIAN); 1716 1717 Options Database Keys: 1718 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1719 1720 Level: beginner 1721 1722 .seealso: MatCreateMPISBAIJ 1723 M*/ 1724 1725 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatCreate_MPISBAIJ" 1729 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1730 { 1731 Mat_MPISBAIJ *b; 1732 PetscErrorCode ierr; 1733 PetscBool flg; 1734 1735 PetscFunctionBegin; 1736 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1737 B->data = (void*)b; 1738 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1739 1740 B->ops->destroy = MatDestroy_MPISBAIJ; 1741 B->ops->view = MatView_MPISBAIJ; 1742 B->assembled = PETSC_FALSE; 1743 B->insertmode = NOT_SET_VALUES; 1744 1745 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 1746 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 1747 1748 /* build local table of row and column ownerships */ 1749 ierr = PetscMalloc1((b->size+2),&b->rangebs);CHKERRQ(ierr); 1750 1751 /* build cache for off array entries formed */ 1752 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 1753 1754 b->donotstash = PETSC_FALSE; 1755 b->colmap = NULL; 1756 b->garray = NULL; 1757 b->roworiented = PETSC_TRUE; 1758 1759 /* stuff used in block assembly */ 1760 b->barray = 0; 1761 1762 /* stuff used for matrix vector multiply */ 1763 b->lvec = 0; 1764 b->Mvctx = 0; 1765 b->slvec0 = 0; 1766 b->slvec0b = 0; 1767 b->slvec1 = 0; 1768 b->slvec1a = 0; 1769 b->slvec1b = 0; 1770 b->sMvctx = 0; 1771 1772 /* stuff for MatGetRow() */ 1773 b->rowindices = 0; 1774 b->rowvalues = 0; 1775 b->getrowactive = PETSC_FALSE; 1776 1777 /* hash table stuff */ 1778 b->ht = 0; 1779 b->hd = 0; 1780 b->ht_size = 0; 1781 b->ht_flag = PETSC_FALSE; 1782 b->ht_fact = 0; 1783 b->ht_total_ct = 0; 1784 b->ht_insert_ct = 0; 1785 1786 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1787 b->ijonly = PETSC_FALSE; 1788 1789 b->in_loc = 0; 1790 b->v_loc = 0; 1791 b->n_loc = 0; 1792 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1793 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 1794 if (flg) { 1795 PetscReal fact = 1.39; 1796 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1797 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 1798 if (fact <= 1.0) fact = 1.39; 1799 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1800 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1801 } 1802 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1803 1804 #if defined(PETSC_HAVE_PASTIX) 1805 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1806 #endif 1807 #if defined(PETSC_HAVE_MUMPS) 1808 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1809 #endif 1810 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1811 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1812 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1813 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1814 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1815 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1816 1817 B->symmetric = PETSC_TRUE; 1818 B->structurally_symmetric = PETSC_TRUE; 1819 B->symmetric_set = PETSC_TRUE; 1820 B->structurally_symmetric_set = PETSC_TRUE; 1821 1822 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 /*MC 1827 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1828 1829 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1830 and MATMPISBAIJ otherwise. 1831 1832 Options Database Keys: 1833 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1834 1835 Level: beginner 1836 1837 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1838 M*/ 1839 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1842 /*@C 1843 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1844 the user should preallocate the matrix storage by setting the parameters 1845 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1846 performance can be increased by more than a factor of 50. 1847 1848 Collective on Mat 1849 1850 Input Parameters: 1851 + B - the matrix 1852 . bs - size of blockk 1853 . d_nz - number of block nonzeros per block row in diagonal portion of local 1854 submatrix (same for all local rows) 1855 . d_nnz - array containing the number of block nonzeros in the various block rows 1856 in the upper triangular and diagonal part of the in diagonal portion of the local 1857 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 1858 for the diagonal entry and set a value even if it is zero. 1859 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1860 submatrix (same for all local rows). 1861 - o_nnz - array containing the number of nonzeros in the various block rows of the 1862 off-diagonal portion of the local submatrix that is right of the diagonal 1863 (possibly different for each block row) or NULL. 1864 1865 1866 Options Database Keys: 1867 . -mat_no_unroll - uses code that does not unroll the loops in the 1868 block calculations (much slower) 1869 . -mat_block_size - size of the blocks to use 1870 1871 Notes: 1872 1873 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1874 than it must be used on all processors that share the object for that argument. 1875 1876 If the *_nnz parameter is given then the *_nz parameter is ignored 1877 1878 Storage Information: 1879 For a square global matrix we define each processor's diagonal portion 1880 to be its local rows and the corresponding columns (a square submatrix); 1881 each processor's off-diagonal portion encompasses the remainder of the 1882 local matrix (a rectangular submatrix). 1883 1884 The user can specify preallocated storage for the diagonal part of 1885 the local submatrix with either d_nz or d_nnz (not both). Set 1886 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 1887 memory allocation. Likewise, specify preallocated storage for the 1888 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1889 1890 You can call MatGetInfo() to get information on how effective the preallocation was; 1891 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1892 You can also run with the option -info and look for messages with the string 1893 malloc in them to see if additional memory allocation was needed. 1894 1895 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1896 the figure below we depict these three local rows and all columns (0-11). 1897 1898 .vb 1899 0 1 2 3 4 5 6 7 8 9 10 11 1900 -------------------------- 1901 row 3 |. . . d d d o o o o o o 1902 row 4 |. . . d d d o o o o o o 1903 row 5 |. . . d d d o o o o o o 1904 -------------------------- 1905 .ve 1906 1907 Thus, any entries in the d locations are stored in the d (diagonal) 1908 submatrix, and any entries in the o locations are stored in the 1909 o (off-diagonal) submatrix. Note that the d matrix is stored in 1910 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1911 1912 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1913 plus the diagonal part of the d matrix, 1914 and o_nz should indicate the number of block nonzeros per row in the o matrix 1915 1916 In general, for PDE problems in which most nonzeros are near the diagonal, 1917 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1918 or you will get TERRIBLE performance; see the users' manual chapter on 1919 matrices. 1920 1921 Level: intermediate 1922 1923 .keywords: matrix, block, aij, compressed row, sparse, parallel 1924 1925 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 1926 @*/ 1927 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1928 { 1929 PetscErrorCode ierr; 1930 1931 PetscFunctionBegin; 1932 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1933 PetscValidType(B,1); 1934 PetscValidLogicalCollectiveInt(B,bs,2); 1935 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatCreateSBAIJ" 1941 /*@C 1942 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1943 (block compressed row). For good matrix assembly performance 1944 the user should preallocate the matrix storage by setting the parameters 1945 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1946 performance can be increased by more than a factor of 50. 1947 1948 Collective on MPI_Comm 1949 1950 Input Parameters: 1951 + comm - MPI communicator 1952 . bs - size of blockk 1953 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1954 This value should be the same as the local size used in creating the 1955 y vector for the matrix-vector product y = Ax. 1956 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1957 This value should be the same as the local size used in creating the 1958 x vector for the matrix-vector product y = Ax. 1959 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1960 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1961 . d_nz - number of block nonzeros per block row in diagonal portion of local 1962 submatrix (same for all local rows) 1963 . d_nnz - array containing the number of block nonzeros in the various block rows 1964 in the upper triangular portion of the in diagonal portion of the local 1965 (possibly different for each block block row) or NULL. 1966 If you plan to factor the matrix you must leave room for the diagonal entry and 1967 set its value even if it is zero. 1968 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1969 submatrix (same for all local rows). 1970 - o_nnz - array containing the number of nonzeros in the various block rows of the 1971 off-diagonal portion of the local submatrix (possibly different for 1972 each block row) or NULL. 1973 1974 Output Parameter: 1975 . A - the matrix 1976 1977 Options Database Keys: 1978 . -mat_no_unroll - uses code that does not unroll the loops in the 1979 block calculations (much slower) 1980 . -mat_block_size - size of the blocks to use 1981 . -mat_mpi - use the parallel matrix data structures even on one processor 1982 (defaults to using SeqBAIJ format on one processor) 1983 1984 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1985 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1986 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1987 1988 Notes: 1989 The number of rows and columns must be divisible by blocksize. 1990 This matrix type does not support complex Hermitian operation. 1991 1992 The user MUST specify either the local or global matrix dimensions 1993 (possibly both). 1994 1995 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1996 than it must be used on all processors that share the object for that argument. 1997 1998 If the *_nnz parameter is given then the *_nz parameter is ignored 1999 2000 Storage Information: 2001 For a square global matrix we define each processor's diagonal portion 2002 to be its local rows and the corresponding columns (a square submatrix); 2003 each processor's off-diagonal portion encompasses the remainder of the 2004 local matrix (a rectangular submatrix). 2005 2006 The user can specify preallocated storage for the diagonal part of 2007 the local submatrix with either d_nz or d_nnz (not both). Set 2008 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2009 memory allocation. Likewise, specify preallocated storage for the 2010 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2011 2012 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2013 the figure below we depict these three local rows and all columns (0-11). 2014 2015 .vb 2016 0 1 2 3 4 5 6 7 8 9 10 11 2017 -------------------------- 2018 row 3 |. . . d d d o o o o o o 2019 row 4 |. . . d d d o o o o o o 2020 row 5 |. . . d d d o o o o o o 2021 -------------------------- 2022 .ve 2023 2024 Thus, any entries in the d locations are stored in the d (diagonal) 2025 submatrix, and any entries in the o locations are stored in the 2026 o (off-diagonal) submatrix. Note that the d matrix is stored in 2027 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2028 2029 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2030 plus the diagonal part of the d matrix, 2031 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2032 In general, for PDE problems in which most nonzeros are near the diagonal, 2033 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2034 or you will get TERRIBLE performance; see the users' manual chapter on 2035 matrices. 2036 2037 Level: intermediate 2038 2039 .keywords: matrix, block, aij, compressed row, sparse, parallel 2040 2041 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2042 @*/ 2043 2044 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2045 { 2046 PetscErrorCode ierr; 2047 PetscMPIInt size; 2048 2049 PetscFunctionBegin; 2050 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2051 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2052 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2053 if (size > 1) { 2054 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2055 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2056 } else { 2057 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2058 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2059 } 2060 PetscFunctionReturn(0); 2061 } 2062 2063 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2066 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2067 { 2068 Mat mat; 2069 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2070 PetscErrorCode ierr; 2071 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2072 PetscScalar *array; 2073 2074 PetscFunctionBegin; 2075 *newmat = 0; 2076 2077 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2078 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2079 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2080 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2081 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2082 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2083 2084 mat->factortype = matin->factortype; 2085 mat->preallocated = PETSC_TRUE; 2086 mat->assembled = PETSC_TRUE; 2087 mat->insertmode = NOT_SET_VALUES; 2088 2089 a = (Mat_MPISBAIJ*)mat->data; 2090 a->bs2 = oldmat->bs2; 2091 a->mbs = oldmat->mbs; 2092 a->nbs = oldmat->nbs; 2093 a->Mbs = oldmat->Mbs; 2094 a->Nbs = oldmat->Nbs; 2095 2096 2097 a->size = oldmat->size; 2098 a->rank = oldmat->rank; 2099 a->donotstash = oldmat->donotstash; 2100 a->roworiented = oldmat->roworiented; 2101 a->rowindices = 0; 2102 a->rowvalues = 0; 2103 a->getrowactive = PETSC_FALSE; 2104 a->barray = 0; 2105 a->rstartbs = oldmat->rstartbs; 2106 a->rendbs = oldmat->rendbs; 2107 a->cstartbs = oldmat->cstartbs; 2108 a->cendbs = oldmat->cendbs; 2109 2110 /* hash table stuff */ 2111 a->ht = 0; 2112 a->hd = 0; 2113 a->ht_size = 0; 2114 a->ht_flag = oldmat->ht_flag; 2115 a->ht_fact = oldmat->ht_fact; 2116 a->ht_total_ct = 0; 2117 a->ht_insert_ct = 0; 2118 2119 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2120 if (oldmat->colmap) { 2121 #if defined(PETSC_USE_CTABLE) 2122 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2123 #else 2124 ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr); 2125 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2126 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2127 #endif 2128 } else a->colmap = 0; 2129 2130 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2131 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2132 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2133 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2134 } else a->garray = 0; 2135 2136 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2137 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2138 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2139 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2140 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2141 2142 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2143 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2144 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2145 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2146 2147 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2148 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2149 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2150 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2151 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2152 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2153 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2154 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2155 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2156 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2157 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2158 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2159 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2160 2161 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2162 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2163 a->sMvctx = oldmat->sMvctx; 2164 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2165 2166 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2167 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2168 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2169 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2170 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2171 *newmat = mat; 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatLoad_MPISBAIJ" 2177 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2178 { 2179 PetscErrorCode ierr; 2180 PetscInt i,nz,j,rstart,rend; 2181 PetscScalar *vals,*buf; 2182 MPI_Comm comm; 2183 MPI_Status status; 2184 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2185 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2186 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2187 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2188 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2189 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2190 int fd; 2191 2192 PetscFunctionBegin; 2193 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2194 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2195 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2196 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2197 if (bs < 0) bs = 1; 2198 2199 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2200 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2201 if (!rank) { 2202 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2203 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2204 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2205 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2206 } 2207 2208 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2209 2210 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2211 M = header[1]; 2212 N = header[2]; 2213 2214 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2215 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2216 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2217 2218 /* If global sizes are set, check if they are consistent with that given in the file */ 2219 if (sizesset) { 2220 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2221 } 2222 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2223 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2224 2225 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2226 2227 /* 2228 This code adds extra rows to make sure the number of rows is 2229 divisible by the blocksize 2230 */ 2231 Mbs = M/bs; 2232 extra_rows = bs - M + bs*(Mbs); 2233 if (extra_rows == bs) extra_rows = 0; 2234 else Mbs++; 2235 if (extra_rows &&!rank) { 2236 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2237 } 2238 2239 /* determine ownership of all rows */ 2240 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2241 mbs = Mbs/size + ((Mbs % size) > rank); 2242 m = mbs*bs; 2243 } else { /* User Set */ 2244 m = newmat->rmap->n; 2245 mbs = m/bs; 2246 } 2247 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2248 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2249 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2250 rowners[0] = 0; 2251 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2252 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2253 rstart = rowners[rank]; 2254 rend = rowners[rank+1]; 2255 2256 /* distribute row lengths to all processors */ 2257 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2258 if (!rank) { 2259 ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr); 2260 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2261 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2262 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2263 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2264 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2265 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2266 } else { 2267 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2268 } 2269 2270 if (!rank) { /* procs[0] */ 2271 /* calculate the number of nonzeros on each processor */ 2272 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2273 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2274 for (i=0; i<size; i++) { 2275 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2276 procsnz[i] += rowlengths[j]; 2277 } 2278 } 2279 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2280 2281 /* determine max buffer needed and allocate it */ 2282 maxnz = 0; 2283 for (i=0; i<size; i++) { 2284 maxnz = PetscMax(maxnz,procsnz[i]); 2285 } 2286 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2287 2288 /* read in my part of the matrix column indices */ 2289 nz = procsnz[0]; 2290 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2291 mycols = ibuf; 2292 if (size == 1) nz -= extra_rows; 2293 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2294 if (size == 1) { 2295 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2296 } 2297 2298 /* read in every ones (except the last) and ship off */ 2299 for (i=1; i<size-1; i++) { 2300 nz = procsnz[i]; 2301 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2302 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2303 } 2304 /* read in the stuff for the last proc */ 2305 if (size != 1) { 2306 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2307 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2308 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2309 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2310 } 2311 ierr = PetscFree(cols);CHKERRQ(ierr); 2312 } else { /* procs[i], i>0 */ 2313 /* determine buffer space needed for message */ 2314 nz = 0; 2315 for (i=0; i<m; i++) nz += locrowlens[i]; 2316 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2317 mycols = ibuf; 2318 /* receive message of column indices*/ 2319 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2320 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2321 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2322 } 2323 2324 /* loop over local rows, determining number of off diagonal entries */ 2325 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2326 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2327 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2328 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2329 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2330 rowcount = 0; 2331 nzcount = 0; 2332 for (i=0; i<mbs; i++) { 2333 dcount = 0; 2334 odcount = 0; 2335 for (j=0; j<bs; j++) { 2336 kmax = locrowlens[rowcount]; 2337 for (k=0; k<kmax; k++) { 2338 tmp = mycols[nzcount++]/bs; /* block col. index */ 2339 if (!mask[tmp]) { 2340 mask[tmp] = 1; 2341 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2342 else masked1[dcount++] = tmp; /* entry in diag portion */ 2343 } 2344 } 2345 rowcount++; 2346 } 2347 2348 dlens[i] = dcount; /* d_nzz[i] */ 2349 odlens[i] = odcount; /* o_nzz[i] */ 2350 2351 /* zero out the mask elements we set */ 2352 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2353 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2354 } 2355 if (!sizesset) { 2356 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2357 } 2358 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2359 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2360 2361 if (!rank) { 2362 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2363 /* read in my part of the matrix numerical values */ 2364 nz = procsnz[0]; 2365 vals = buf; 2366 mycols = ibuf; 2367 if (size == 1) nz -= extra_rows; 2368 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2369 if (size == 1) { 2370 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2371 } 2372 2373 /* insert into matrix */ 2374 jj = rstart*bs; 2375 for (i=0; i<m; i++) { 2376 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2377 mycols += locrowlens[i]; 2378 vals += locrowlens[i]; 2379 jj++; 2380 } 2381 2382 /* read in other processors (except the last one) and ship out */ 2383 for (i=1; i<size-1; i++) { 2384 nz = procsnz[i]; 2385 vals = buf; 2386 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2387 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2388 } 2389 /* the last proc */ 2390 if (size != 1) { 2391 nz = procsnz[i] - extra_rows; 2392 vals = buf; 2393 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2394 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2395 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2396 } 2397 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2398 2399 } else { 2400 /* receive numeric values */ 2401 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2402 2403 /* receive message of values*/ 2404 vals = buf; 2405 mycols = ibuf; 2406 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2407 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2408 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2409 2410 /* insert into matrix */ 2411 jj = rstart*bs; 2412 for (i=0; i<m; i++) { 2413 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2414 mycols += locrowlens[i]; 2415 vals += locrowlens[i]; 2416 jj++; 2417 } 2418 } 2419 2420 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2421 ierr = PetscFree(buf);CHKERRQ(ierr); 2422 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2423 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2424 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2425 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2426 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2427 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2428 PetscFunctionReturn(0); 2429 } 2430 2431 #undef __FUNCT__ 2432 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2433 /*XXXXX@ 2434 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2435 2436 Input Parameters: 2437 . mat - the matrix 2438 . fact - factor 2439 2440 Not Collective on Mat, each process can have a different hash factor 2441 2442 Level: advanced 2443 2444 Notes: 2445 This can also be set by the command line option: -mat_use_hash_table fact 2446 2447 .keywords: matrix, hashtable, factor, HT 2448 2449 .seealso: MatSetOption() 2450 @XXXXX*/ 2451 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2455 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2456 { 2457 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2458 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2459 PetscReal atmp; 2460 PetscReal *work,*svalues,*rvalues; 2461 PetscErrorCode ierr; 2462 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2463 PetscMPIInt rank,size; 2464 PetscInt *rowners_bs,dest,count,source; 2465 PetscScalar *va; 2466 MatScalar *ba; 2467 MPI_Status stat; 2468 2469 PetscFunctionBegin; 2470 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2471 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2472 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2473 2474 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2475 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2476 2477 bs = A->rmap->bs; 2478 mbs = a->mbs; 2479 Mbs = a->Mbs; 2480 ba = b->a; 2481 bi = b->i; 2482 bj = b->j; 2483 2484 /* find ownerships */ 2485 rowners_bs = A->rmap->range; 2486 2487 /* each proc creates an array to be distributed */ 2488 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2489 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2490 2491 /* row_max for B */ 2492 if (rank != size-1) { 2493 for (i=0; i<mbs; i++) { 2494 ncols = bi[1] - bi[0]; bi++; 2495 brow = bs*i; 2496 for (j=0; j<ncols; j++) { 2497 bcol = bs*(*bj); 2498 for (kcol=0; kcol<bs; kcol++) { 2499 col = bcol + kcol; /* local col index */ 2500 col += rowners_bs[rank+1]; /* global col index */ 2501 for (krow=0; krow<bs; krow++) { 2502 atmp = PetscAbsScalar(*ba); ba++; 2503 row = brow + krow; /* local row index */ 2504 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2505 if (work[col] < atmp) work[col] = atmp; 2506 } 2507 } 2508 bj++; 2509 } 2510 } 2511 2512 /* send values to its owners */ 2513 for (dest=rank+1; dest<size; dest++) { 2514 svalues = work + rowners_bs[dest]; 2515 count = rowners_bs[dest+1]-rowners_bs[dest]; 2516 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2517 } 2518 } 2519 2520 /* receive values */ 2521 if (rank) { 2522 rvalues = work; 2523 count = rowners_bs[rank+1]-rowners_bs[rank]; 2524 for (source=0; source<rank; source++) { 2525 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2526 /* process values */ 2527 for (i=0; i<count; i++) { 2528 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2529 } 2530 } 2531 } 2532 2533 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2534 ierr = PetscFree(work);CHKERRQ(ierr); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 #undef __FUNCT__ 2539 #define __FUNCT__ "MatSOR_MPISBAIJ" 2540 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2541 { 2542 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2543 PetscErrorCode ierr; 2544 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2545 PetscScalar *x,*ptr,*from; 2546 Vec bb1; 2547 const PetscScalar *b; 2548 2549 PetscFunctionBegin; 2550 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2551 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2552 2553 if (flag == SOR_APPLY_UPPER) { 2554 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2555 PetscFunctionReturn(0); 2556 } 2557 2558 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2559 if (flag & SOR_ZERO_INITIAL_GUESS) { 2560 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2561 its--; 2562 } 2563 2564 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2565 while (its--) { 2566 2567 /* lower triangular part: slvec0b = - B^T*xx */ 2568 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2569 2570 /* copy xx into slvec0a */ 2571 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2572 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2573 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2574 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2575 2576 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2577 2578 /* copy bb into slvec1a */ 2579 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2580 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2581 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2582 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2583 2584 /* set slvec1b = 0 */ 2585 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2586 2587 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2588 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2589 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2590 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2591 2592 /* upper triangular part: bb1 = bb1 - B*x */ 2593 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2594 2595 /* local diagonal sweep */ 2596 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2597 } 2598 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2599 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2600 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2601 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2602 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2603 } else if (flag & SOR_EISENSTAT) { 2604 Vec xx1; 2605 PetscBool hasop; 2606 const PetscScalar *diag; 2607 PetscScalar *sl,scale = (omega - 2.0)/omega; 2608 PetscInt i,n; 2609 2610 if (!mat->xx1) { 2611 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2612 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2613 } 2614 xx1 = mat->xx1; 2615 bb1 = mat->bb1; 2616 2617 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2618 2619 if (!mat->diag) { 2620 /* this is wrong for same matrix with new nonzero values */ 2621 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2622 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2623 } 2624 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2625 2626 if (hasop) { 2627 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2628 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2629 } else { 2630 /* 2631 These two lines are replaced by code that may be a bit faster for a good compiler 2632 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2633 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2634 */ 2635 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2636 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2637 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2638 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2639 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2640 if (omega == 1.0) { 2641 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2642 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2643 } else { 2644 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2645 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2646 } 2647 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2648 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2649 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2650 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2651 } 2652 2653 /* multiply off-diagonal portion of matrix */ 2654 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2655 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2656 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2657 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2658 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2659 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2660 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2661 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2662 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2663 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2664 2665 /* local sweep */ 2666 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2667 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2668 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2669 PetscFunctionReturn(0); 2670 } 2671 2672 #undef __FUNCT__ 2673 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2674 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2675 { 2676 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2677 PetscErrorCode ierr; 2678 Vec lvec1,bb1; 2679 2680 PetscFunctionBegin; 2681 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2682 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2683 2684 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2685 if (flag & SOR_ZERO_INITIAL_GUESS) { 2686 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2687 its--; 2688 } 2689 2690 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2691 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2692 while (its--) { 2693 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2694 2695 /* lower diagonal part: bb1 = bb - B^T*xx */ 2696 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2697 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2698 2699 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2700 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2701 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2702 2703 /* upper diagonal part: bb1 = bb1 - B*x */ 2704 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2705 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2706 2707 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2708 2709 /* diagonal sweep */ 2710 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2711 } 2712 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2713 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2714 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2715 PetscFunctionReturn(0); 2716 } 2717 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2720 /*@ 2721 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2722 CSR format the local rows. 2723 2724 Collective on MPI_Comm 2725 2726 Input Parameters: 2727 + comm - MPI communicator 2728 . bs - the block size, only a block size of 1 is supported 2729 . m - number of local rows (Cannot be PETSC_DECIDE) 2730 . n - This value should be the same as the local size used in creating the 2731 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2732 calculated if N is given) For square matrices n is almost always m. 2733 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2734 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2735 . i - row indices 2736 . j - column indices 2737 - a - matrix values 2738 2739 Output Parameter: 2740 . mat - the matrix 2741 2742 Level: intermediate 2743 2744 Notes: 2745 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2746 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2747 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2748 2749 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2750 2751 .keywords: matrix, aij, compressed row, sparse, parallel 2752 2753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2754 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2755 @*/ 2756 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2757 { 2758 PetscErrorCode ierr; 2759 2760 2761 PetscFunctionBegin; 2762 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2763 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2764 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2765 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2766 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2767 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2768 PetscFunctionReturn(0); 2769 } 2770 2771 2772 #undef __FUNCT__ 2773 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2774 /*@C 2775 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2776 (the default parallel PETSc format). 2777 2778 Collective on MPI_Comm 2779 2780 Input Parameters: 2781 + B - the matrix 2782 . bs - the block size 2783 . i - the indices into j for the start of each local row (starts with zero) 2784 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2785 - v - optional values in the matrix 2786 2787 Level: developer 2788 2789 .keywords: matrix, aij, compressed row, sparse, parallel 2790 2791 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2792 @*/ 2793 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2794 { 2795 PetscErrorCode ierr; 2796 2797 PetscFunctionBegin; 2798 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2799 PetscFunctionReturn(0); 2800 } 2801 2802 2803