1a30f8f8cSSatish Balay 2c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 5c6db04a5SJed Brown #include <petscblaslapack.h> 6a30f8f8cSSatish Balay 76214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 8cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 96214f412SHong Zhang #endif 10b147fbf3SStefano Zampini 11b147fbf3SStefano Zampini /* This could be moved to matimpl.h */ 12b147fbf3SStefano Zampini static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 13b147fbf3SStefano Zampini { 14b147fbf3SStefano Zampini Mat preallocator; 15b147fbf3SStefano Zampini PetscInt r,rstart,rend; 16b147fbf3SStefano Zampini PetscInt bs,i,m,n,M,N; 17b147fbf3SStefano Zampini PetscBool cong = PETSC_TRUE; 18b147fbf3SStefano Zampini PetscErrorCode ierr; 19b147fbf3SStefano Zampini 20b147fbf3SStefano Zampini PetscFunctionBegin; 21b147fbf3SStefano Zampini PetscValidHeaderSpecific(B,MAT_CLASSID,1); 22b147fbf3SStefano Zampini PetscValidLogicalCollectiveInt(B,nm,2); 23b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 24b147fbf3SStefano Zampini PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 25b147fbf3SStefano Zampini ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 26b147fbf3SStefano Zampini if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 27b147fbf3SStefano Zampini } 28b147fbf3SStefano Zampini PetscValidLogicalCollectiveBool(B,fill,5); 29b147fbf3SStefano Zampini ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 30b147fbf3SStefano Zampini ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 31b147fbf3SStefano Zampini ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 32b147fbf3SStefano Zampini ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 33b147fbf3SStefano Zampini ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 34b147fbf3SStefano Zampini ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 35b147fbf3SStefano Zampini ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 36b147fbf3SStefano Zampini ierr = MatSetUp(preallocator);CHKERRQ(ierr); 37b147fbf3SStefano Zampini ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 38b147fbf3SStefano Zampini for (r = rstart; r < rend; ++r) { 39b147fbf3SStefano Zampini PetscInt ncols; 40b147fbf3SStefano Zampini const PetscInt *row; 41b147fbf3SStefano Zampini const PetscScalar *vals; 42b147fbf3SStefano Zampini 43b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 44b147fbf3SStefano Zampini ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 45b147fbf3SStefano Zampini ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 46b147fbf3SStefano Zampini if (symm && symm[i]) { 47b147fbf3SStefano Zampini ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 48b147fbf3SStefano Zampini } 49b147fbf3SStefano Zampini ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 50b147fbf3SStefano Zampini } 51b147fbf3SStefano Zampini } 52b147fbf3SStefano Zampini ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 53b147fbf3SStefano Zampini ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 54b147fbf3SStefano Zampini ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 55b147fbf3SStefano Zampini ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 56b147fbf3SStefano Zampini PetscFunctionReturn(0); 57b147fbf3SStefano Zampini } 58b147fbf3SStefano Zampini 59b147fbf3SStefano Zampini static PetscErrorCode MatConvert_MPISBAIJ_XAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60b147fbf3SStefano Zampini { 61b147fbf3SStefano Zampini Mat B; 62b147fbf3SStefano Zampini PetscErrorCode ierr; 63b147fbf3SStefano Zampini PetscInt r; 64b147fbf3SStefano Zampini 65b147fbf3SStefano Zampini PetscFunctionBegin; 66b147fbf3SStefano Zampini if (reuse != MAT_REUSE_MATRIX) { 67b147fbf3SStefano Zampini PetscBool symm = PETSC_TRUE; 68b147fbf3SStefano Zampini PetscInt bs; 69b147fbf3SStefano Zampini 70b147fbf3SStefano Zampini ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 71b147fbf3SStefano Zampini ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 72b147fbf3SStefano Zampini ierr = MatSetType(B,newtype);CHKERRQ(ierr); 73b147fbf3SStefano Zampini ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 74b147fbf3SStefano Zampini ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 75b147fbf3SStefano Zampini ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 76b147fbf3SStefano Zampini ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 77b147fbf3SStefano Zampini ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 78b147fbf3SStefano Zampini ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 79b147fbf3SStefano Zampini ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 80b147fbf3SStefano Zampini } else B = *newmat; 81b147fbf3SStefano Zampini 82b147fbf3SStefano Zampini ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 83b147fbf3SStefano Zampini for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 84b147fbf3SStefano Zampini PetscInt ncols; 85b147fbf3SStefano Zampini const PetscInt *row; 86b147fbf3SStefano Zampini const PetscScalar *vals; 87b147fbf3SStefano Zampini 88b147fbf3SStefano Zampini ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 89b147fbf3SStefano Zampini ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 90b147fbf3SStefano Zampini ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 91b147fbf3SStefano Zampini ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 92b147fbf3SStefano Zampini } 93b147fbf3SStefano Zampini ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 94b147fbf3SStefano Zampini ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95b147fbf3SStefano Zampini ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96b147fbf3SStefano Zampini 97b147fbf3SStefano Zampini if (reuse == MAT_INPLACE_MATRIX) { 98b147fbf3SStefano Zampini ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 99b147fbf3SStefano Zampini } else { 100b147fbf3SStefano Zampini *newmat = B; 101b147fbf3SStefano Zampini } 102b147fbf3SStefano Zampini PetscFunctionReturn(0); 103b147fbf3SStefano Zampini } 104b147fbf3SStefano Zampini 1057087cfbeSBarry Smith PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 106a30f8f8cSSatish Balay { 107f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 108dfbe8321SBarry Smith PetscErrorCode ierr; 109a30f8f8cSSatish Balay 110a30f8f8cSSatish Balay PetscFunctionBegin; 111a30f8f8cSSatish Balay ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 112a30f8f8cSSatish Balay ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 113a30f8f8cSSatish Balay PetscFunctionReturn(0); 114a30f8f8cSSatish Balay } 115a30f8f8cSSatish Balay 1167087cfbeSBarry Smith PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 117a30f8f8cSSatish Balay { 118f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 119dfbe8321SBarry Smith PetscErrorCode ierr; 120a30f8f8cSSatish Balay 121a30f8f8cSSatish Balay PetscFunctionBegin; 122a30f8f8cSSatish Balay ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 123a30f8f8cSSatish Balay ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 124a30f8f8cSSatish Balay PetscFunctionReturn(0); 125a30f8f8cSSatish Balay } 126a30f8f8cSSatish Balay 127d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 128a30f8f8cSSatish Balay { \ 129a30f8f8cSSatish Balay \ 130a30f8f8cSSatish Balay brow = row/bs; \ 131a30f8f8cSSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 132a30f8f8cSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 133a30f8f8cSSatish Balay bcol = col/bs; \ 134a30f8f8cSSatish Balay ridx = row % bs; cidx = col % bs; \ 135a30f8f8cSSatish Balay low = 0; high = nrow; \ 136a30f8f8cSSatish Balay while (high-low > 3) { \ 137a30f8f8cSSatish Balay t = (low+high)/2; \ 138a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 139a30f8f8cSSatish Balay else low = t; \ 140a30f8f8cSSatish Balay } \ 141a30f8f8cSSatish Balay for (_i=low; _i<high; _i++) { \ 142a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 143a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 144a30f8f8cSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 145a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 146a30f8f8cSSatish Balay else *bap = value; \ 147a30f8f8cSSatish Balay goto a_noinsert; \ 148a30f8f8cSSatish Balay } \ 149a30f8f8cSSatish Balay } \ 150a30f8f8cSSatish Balay if (a->nonew == 1) goto a_noinsert; \ 151d40312a9SBarry Smith if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 152fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 153a30f8f8cSSatish Balay N = nrow++ - 1; \ 154a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 155a30f8f8cSSatish Balay for (ii=N; ii>=_i; ii--) { \ 156a30f8f8cSSatish Balay rp[ii+1] = rp[ii]; \ 157a30f8f8cSSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 158a30f8f8cSSatish Balay } \ 159a30f8f8cSSatish Balay if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 160a30f8f8cSSatish Balay rp[_i] = bcol; \ 161a30f8f8cSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 162e56f5c9eSBarry Smith A->nonzerostate++;\ 163a30f8f8cSSatish Balay a_noinsert:; \ 164a30f8f8cSSatish Balay ailen[brow] = nrow; \ 165a30f8f8cSSatish Balay } 166e5e170daSBarry Smith 167d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 168a30f8f8cSSatish Balay { \ 169a30f8f8cSSatish Balay brow = row/bs; \ 170a30f8f8cSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 171a30f8f8cSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 172a30f8f8cSSatish Balay bcol = col/bs; \ 173a30f8f8cSSatish Balay ridx = row % bs; cidx = col % bs; \ 174a30f8f8cSSatish Balay low = 0; high = nrow; \ 175a30f8f8cSSatish Balay while (high-low > 3) { \ 176a30f8f8cSSatish Balay t = (low+high)/2; \ 177a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 178a30f8f8cSSatish Balay else low = t; \ 179a30f8f8cSSatish Balay } \ 180a30f8f8cSSatish Balay for (_i=low; _i<high; _i++) { \ 181a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 182a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 183a30f8f8cSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 184a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 185a30f8f8cSSatish Balay else *bap = value; \ 186a30f8f8cSSatish Balay goto b_noinsert; \ 187a30f8f8cSSatish Balay } \ 188a30f8f8cSSatish Balay } \ 189a30f8f8cSSatish Balay if (b->nonew == 1) goto b_noinsert; \ 190d40312a9SBarry Smith if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 191fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 192a30f8f8cSSatish Balay N = nrow++ - 1; \ 193a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 194a30f8f8cSSatish Balay for (ii=N; ii>=_i; ii--) { \ 195a30f8f8cSSatish Balay rp[ii+1] = rp[ii]; \ 196a30f8f8cSSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 197a30f8f8cSSatish Balay } \ 198a30f8f8cSSatish Balay if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 199a30f8f8cSSatish Balay rp[_i] = bcol; \ 200a30f8f8cSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 201e56f5c9eSBarry Smith B->nonzerostate++;\ 202a30f8f8cSSatish Balay b_noinsert:; \ 203a30f8f8cSSatish Balay bilen[brow] = nrow; \ 204a30f8f8cSSatish Balay } 205a30f8f8cSSatish Balay 206a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks). 207a30f8f8cSSatish Balay Any a(i,j) with i>j input by user is ingored. 208a30f8f8cSSatish Balay */ 209dd6ea824SBarry Smith PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 210a30f8f8cSSatish Balay { 211a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 212a30f8f8cSSatish Balay MatScalar value; 213ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 214dfbe8321SBarry Smith PetscErrorCode ierr; 2151302d50aSBarry Smith PetscInt i,j,row,col; 216d0f46423SBarry Smith PetscInt rstart_orig=mat->rmap->rstart; 217d0f46423SBarry Smith PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 218d0f46423SBarry Smith PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 219a30f8f8cSSatish Balay 220a30f8f8cSSatish Balay /* Some Variables required in the macro */ 221a30f8f8cSSatish Balay Mat A = baij->A; 222a30f8f8cSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 2231302d50aSBarry Smith PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 224a30f8f8cSSatish Balay MatScalar *aa =a->a; 225a30f8f8cSSatish Balay 226a30f8f8cSSatish Balay Mat B = baij->B; 227a30f8f8cSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 2281302d50aSBarry Smith PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 229a30f8f8cSSatish Balay MatScalar *ba =b->a; 230a30f8f8cSSatish Balay 2311302d50aSBarry Smith PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 2321302d50aSBarry Smith PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 233a30f8f8cSSatish Balay MatScalar *ap,*bap; 234a30f8f8cSSatish Balay 235a30f8f8cSSatish Balay /* for stash */ 2360298fd71SBarry Smith PetscInt n_loc, *in_loc = NULL; 2370298fd71SBarry Smith MatScalar *v_loc = NULL; 238a30f8f8cSSatish Balay 239a30f8f8cSSatish Balay PetscFunctionBegin; 240a30f8f8cSSatish Balay if (!baij->donotstash) { 24159ffdab8SBarry Smith if (n > baij->n_loc) { 24259ffdab8SBarry Smith ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 24359ffdab8SBarry Smith ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 244785e854fSJed Brown ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 245785e854fSJed Brown ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 24626fbe8dcSKarl Rupp 24759ffdab8SBarry Smith baij->n_loc = n; 24859ffdab8SBarry Smith } 24959ffdab8SBarry Smith in_loc = baij->in_loc; 25059ffdab8SBarry Smith v_loc = baij->v_loc; 251a30f8f8cSSatish Balay } 252a30f8f8cSSatish Balay 253a30f8f8cSSatish Balay for (i=0; i<m; i++) { 254a30f8f8cSSatish Balay if (im[i] < 0) continue; 2552515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 256e32f2f54SBarry Smith if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 257a30f8f8cSSatish Balay #endif 258a30f8f8cSSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 259a30f8f8cSSatish Balay row = im[i] - rstart_orig; /* local row index */ 260a30f8f8cSSatish Balay for (j=0; j<n; j++) { 26101b2bd88SHong Zhang if (im[i]/bs > in[j]/bs) { 26201b2bd88SHong Zhang if (a->ignore_ltriangular) { 26301b2bd88SHong Zhang continue; /* ignore lower triangular blocks */ 26426fbe8dcSKarl Rupp } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 26501b2bd88SHong Zhang } 266a30f8f8cSSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 267a30f8f8cSSatish Balay col = in[j] - cstart_orig; /* local col index */ 268a30f8f8cSSatish Balay brow = row/bs; bcol = col/bs; 269a30f8f8cSSatish Balay if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 270db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 271db4deed7SKarl Rupp else value = v[i+j*m]; 272d40312a9SBarry Smith MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 273a30f8f8cSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 274a30f8f8cSSatish Balay } else if (in[j] < 0) continue; 2752515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 276cb9801acSJed Brown else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 277a30f8f8cSSatish Balay #endif 278a30f8f8cSSatish Balay else { /* off-diag entry (B) */ 279a30f8f8cSSatish Balay if (mat->was_assembled) { 280a30f8f8cSSatish Balay if (!baij->colmap) { 281ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 282a30f8f8cSSatish Balay } 283a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 284a30f8f8cSSatish Balay ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 28571730473SSatish Balay col = col - 1; 286a30f8f8cSSatish Balay #else 28771730473SSatish Balay col = baij->colmap[in[j]/bs] - 1; 288a30f8f8cSSatish Balay #endif 289a30f8f8cSSatish Balay if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 290ab9863d7SBarry Smith ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 291a30f8f8cSSatish Balay col = in[j]; 292a30f8f8cSSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293a30f8f8cSSatish Balay B = baij->B; 294a30f8f8cSSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 295a30f8f8cSSatish Balay bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 296a30f8f8cSSatish Balay ba = b->a; 29771730473SSatish Balay } else col += in[j]%bs; 298a30f8f8cSSatish Balay } else col = in[j]; 299db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 300db4deed7SKarl Rupp else value = v[i+j*m]; 301d40312a9SBarry Smith MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 302a30f8f8cSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 303a30f8f8cSSatish Balay } 304a30f8f8cSSatish Balay } 305a30f8f8cSSatish Balay } else { /* off processor entry */ 3064cb17eb5SBarry Smith if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 307a30f8f8cSSatish Balay if (!baij->donotstash) { 3085080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 309a30f8f8cSSatish Balay n_loc = 0; 310a30f8f8cSSatish Balay for (j=0; j<n; j++) { 311f65c83cfSHong Zhang if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 312a30f8f8cSSatish Balay in_loc[n_loc] = in[j]; 313a30f8f8cSSatish Balay if (roworiented) { 314a30f8f8cSSatish Balay v_loc[n_loc] = v[i*n+j]; 315a30f8f8cSSatish Balay } else { 316a30f8f8cSSatish Balay v_loc[n_loc] = v[j*m+i]; 317a30f8f8cSSatish Balay } 318a30f8f8cSSatish Balay n_loc++; 319a30f8f8cSSatish Balay } 320b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 321a30f8f8cSSatish Balay } 322a30f8f8cSSatish Balay } 323a30f8f8cSSatish Balay } 324a30f8f8cSSatish Balay PetscFunctionReturn(0); 325a30f8f8cSSatish Balay } 326a30f8f8cSSatish Balay 32736bd2089SBarry Smith PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 32836bd2089SBarry Smith { 32936bd2089SBarry Smith Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 33036bd2089SBarry Smith PetscErrorCode ierr; 33136bd2089SBarry Smith PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 33236bd2089SBarry Smith PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 33336bd2089SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 33436bd2089SBarry Smith PetscBool roworiented=a->roworiented; 33536bd2089SBarry Smith const PetscScalar *value = v; 33636bd2089SBarry Smith MatScalar *ap,*aa = a->a,*bap; 33736bd2089SBarry Smith 33836bd2089SBarry Smith PetscFunctionBegin; 33936bd2089SBarry Smith if (col < row) { 34036bd2089SBarry Smith if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 34136bd2089SBarry Smith else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 34236bd2089SBarry Smith } 34336bd2089SBarry Smith rp = aj + ai[row]; 34436bd2089SBarry Smith ap = aa + bs2*ai[row]; 34536bd2089SBarry Smith rmax = imax[row]; 34636bd2089SBarry Smith nrow = ailen[row]; 34736bd2089SBarry Smith value = v; 34836bd2089SBarry Smith low = 0; 34936bd2089SBarry Smith high = nrow; 35036bd2089SBarry Smith 35136bd2089SBarry Smith while (high-low > 7) { 35236bd2089SBarry Smith t = (low+high)/2; 35336bd2089SBarry Smith if (rp[t] > col) high = t; 35436bd2089SBarry Smith else low = t; 35536bd2089SBarry Smith } 35636bd2089SBarry Smith for (i=low; i<high; i++) { 35736bd2089SBarry Smith if (rp[i] > col) break; 35836bd2089SBarry Smith if (rp[i] == col) { 35936bd2089SBarry Smith bap = ap + bs2*i; 36036bd2089SBarry Smith if (roworiented) { 36136bd2089SBarry Smith if (is == ADD_VALUES) { 36236bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 36336bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 36436bd2089SBarry Smith bap[jj] += *value++; 36536bd2089SBarry Smith } 36636bd2089SBarry Smith } 36736bd2089SBarry Smith } else { 36836bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 36936bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 37036bd2089SBarry Smith bap[jj] = *value++; 37136bd2089SBarry Smith } 37236bd2089SBarry Smith } 37336bd2089SBarry Smith } 37436bd2089SBarry Smith } else { 37536bd2089SBarry Smith if (is == ADD_VALUES) { 37636bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 37736bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 37836bd2089SBarry Smith *bap++ += *value++; 37936bd2089SBarry Smith } 38036bd2089SBarry Smith } 38136bd2089SBarry Smith } else { 38236bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 38336bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 38436bd2089SBarry Smith *bap++ = *value++; 38536bd2089SBarry Smith } 38636bd2089SBarry Smith } 38736bd2089SBarry Smith } 38836bd2089SBarry Smith } 38936bd2089SBarry Smith goto noinsert2; 39036bd2089SBarry Smith } 39136bd2089SBarry Smith } 39236bd2089SBarry Smith if (nonew == 1) goto noinsert2; 39336bd2089SBarry Smith if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 39436bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 39536bd2089SBarry Smith N = nrow++ - 1; high++; 39636bd2089SBarry Smith /* shift up all the later entries in this row */ 39736bd2089SBarry Smith for (ii=N; ii>=i; ii--) { 39836bd2089SBarry Smith rp[ii+1] = rp[ii]; 39936bd2089SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 40036bd2089SBarry Smith } 40136bd2089SBarry Smith if (N >= i) { 40236bd2089SBarry Smith ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 40336bd2089SBarry Smith } 40436bd2089SBarry Smith rp[i] = col; 40536bd2089SBarry Smith bap = ap + bs2*i; 40636bd2089SBarry Smith if (roworiented) { 40736bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 40836bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 40936bd2089SBarry Smith bap[jj] = *value++; 41036bd2089SBarry Smith } 41136bd2089SBarry Smith } 41236bd2089SBarry Smith } else { 41336bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 41436bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 41536bd2089SBarry Smith *bap++ = *value++; 41636bd2089SBarry Smith } 41736bd2089SBarry Smith } 41836bd2089SBarry Smith } 41936bd2089SBarry Smith noinsert2:; 42036bd2089SBarry Smith ailen[row] = nrow; 42136bd2089SBarry Smith PetscFunctionReturn(0); 42236bd2089SBarry Smith } 42336bd2089SBarry Smith 42436bd2089SBarry Smith /* 42536bd2089SBarry Smith This routine is exactly duplicated in mpibaij.c 42636bd2089SBarry Smith */ 42736bd2089SBarry Smith PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 42836bd2089SBarry Smith { 42936bd2089SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 43036bd2089SBarry Smith PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 43136bd2089SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 43236bd2089SBarry Smith PetscErrorCode ierr; 43336bd2089SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 43436bd2089SBarry Smith PetscBool roworiented=a->roworiented; 43536bd2089SBarry Smith const PetscScalar *value = v; 43636bd2089SBarry Smith MatScalar *ap,*aa = a->a,*bap; 43736bd2089SBarry Smith 43836bd2089SBarry Smith PetscFunctionBegin; 43936bd2089SBarry Smith rp = aj + ai[row]; 44036bd2089SBarry Smith ap = aa + bs2*ai[row]; 44136bd2089SBarry Smith rmax = imax[row]; 44236bd2089SBarry Smith nrow = ailen[row]; 44336bd2089SBarry Smith low = 0; 44436bd2089SBarry Smith high = nrow; 44536bd2089SBarry Smith value = v; 44636bd2089SBarry Smith while (high-low > 7) { 44736bd2089SBarry Smith t = (low+high)/2; 44836bd2089SBarry Smith if (rp[t] > col) high = t; 44936bd2089SBarry Smith else low = t; 45036bd2089SBarry Smith } 45136bd2089SBarry Smith for (i=low; i<high; i++) { 45236bd2089SBarry Smith if (rp[i] > col) break; 45336bd2089SBarry Smith if (rp[i] == col) { 45436bd2089SBarry Smith bap = ap + bs2*i; 45536bd2089SBarry Smith if (roworiented) { 45636bd2089SBarry Smith if (is == ADD_VALUES) { 45736bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 45836bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 45936bd2089SBarry Smith bap[jj] += *value++; 46036bd2089SBarry Smith } 46136bd2089SBarry Smith } 46236bd2089SBarry Smith } else { 46336bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 46436bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 46536bd2089SBarry Smith bap[jj] = *value++; 46636bd2089SBarry Smith } 46736bd2089SBarry Smith } 46836bd2089SBarry Smith } 46936bd2089SBarry Smith } else { 47036bd2089SBarry Smith if (is == ADD_VALUES) { 47136bd2089SBarry Smith for (ii=0; ii<bs; ii++,value+=bs) { 47236bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 47336bd2089SBarry Smith bap[jj] += value[jj]; 47436bd2089SBarry Smith } 47536bd2089SBarry Smith bap += bs; 47636bd2089SBarry Smith } 47736bd2089SBarry Smith } else { 47836bd2089SBarry Smith for (ii=0; ii<bs; ii++,value+=bs) { 47936bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 48036bd2089SBarry Smith bap[jj] = value[jj]; 48136bd2089SBarry Smith } 48236bd2089SBarry Smith bap += bs; 48336bd2089SBarry Smith } 48436bd2089SBarry Smith } 48536bd2089SBarry Smith } 48636bd2089SBarry Smith goto noinsert2; 48736bd2089SBarry Smith } 48836bd2089SBarry Smith } 48936bd2089SBarry Smith if (nonew == 1) goto noinsert2; 49036bd2089SBarry Smith if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 49136bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 49236bd2089SBarry Smith N = nrow++ - 1; high++; 49336bd2089SBarry Smith /* shift up all the later entries in this row */ 49436bd2089SBarry Smith for (ii=N; ii>=i; ii--) { 49536bd2089SBarry Smith rp[ii+1] = rp[ii]; 49636bd2089SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 49736bd2089SBarry Smith } 49836bd2089SBarry Smith if (N >= i) { 49936bd2089SBarry Smith ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 50036bd2089SBarry Smith } 50136bd2089SBarry Smith rp[i] = col; 50236bd2089SBarry Smith bap = ap + bs2*i; 50336bd2089SBarry Smith if (roworiented) { 50436bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 50536bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 50636bd2089SBarry Smith bap[jj] = *value++; 50736bd2089SBarry Smith } 50836bd2089SBarry Smith } 50936bd2089SBarry Smith } else { 51036bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 51136bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 51236bd2089SBarry Smith *bap++ = *value++; 51336bd2089SBarry Smith } 51436bd2089SBarry Smith } 51536bd2089SBarry Smith } 51636bd2089SBarry Smith noinsert2:; 51736bd2089SBarry Smith ailen[row] = nrow; 51836bd2089SBarry Smith PetscFunctionReturn(0); 51936bd2089SBarry Smith } 52036bd2089SBarry Smith 52136bd2089SBarry Smith /* 52236bd2089SBarry Smith This routine could be optimized by removing the need for the block copy below and passing stride information 52336bd2089SBarry Smith to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 52436bd2089SBarry Smith */ 525dd6ea824SBarry Smith PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 526a30f8f8cSSatish Balay { 5270880e062SHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 528f15d580aSBarry Smith const MatScalar *value; 529f15d580aSBarry Smith MatScalar *barray =baij->barray; 530ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 531dfbe8321SBarry Smith PetscErrorCode ierr; 532899cda47SBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 533899cda47SBarry Smith PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 534d0f46423SBarry Smith PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 5350880e062SHong Zhang 536a30f8f8cSSatish Balay PetscFunctionBegin; 5370880e062SHong Zhang if (!barray) { 538785e854fSJed Brown ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 5390880e062SHong Zhang baij->barray = barray; 5400880e062SHong Zhang } 5410880e062SHong Zhang 5420880e062SHong Zhang if (roworiented) { 5430880e062SHong Zhang stepval = (n-1)*bs; 5440880e062SHong Zhang } else { 5450880e062SHong Zhang stepval = (m-1)*bs; 5460880e062SHong Zhang } 5470880e062SHong Zhang for (i=0; i<m; i++) { 5480880e062SHong Zhang if (im[i] < 0) continue; 5492515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 550bb003d0fSBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 5510880e062SHong Zhang #endif 5520880e062SHong Zhang if (im[i] >= rstart && im[i] < rend) { 5530880e062SHong Zhang row = im[i] - rstart; 5540880e062SHong Zhang for (j=0; j<n; j++) { 555f3f98c53SJed Brown if (im[i] > in[j]) { 556f3f98c53SJed Brown if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 557e32f2f54SBarry Smith else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 558f3f98c53SJed Brown } 5590880e062SHong Zhang /* If NumCol = 1 then a copy is not required */ 5600880e062SHong Zhang if ((roworiented) && (n == 1)) { 561f15d580aSBarry Smith barray = (MatScalar*) v + i*bs2; 5620880e062SHong Zhang } else if ((!roworiented) && (m == 1)) { 563f15d580aSBarry Smith barray = (MatScalar*) v + j*bs2; 5640880e062SHong Zhang } else { /* Here a copy is required */ 5650880e062SHong Zhang if (roworiented) { 5660880e062SHong Zhang value = v + i*(stepval+bs)*bs + j*bs; 5670880e062SHong Zhang } else { 5680880e062SHong Zhang value = v + j*(stepval+bs)*bs + i*bs; 5690880e062SHong Zhang } 5700880e062SHong Zhang for (ii=0; ii<bs; ii++,value+=stepval) { 5710880e062SHong Zhang for (jj=0; jj<bs; jj++) { 5720880e062SHong Zhang *barray++ = *value++; 5730880e062SHong Zhang } 5740880e062SHong Zhang } 5750880e062SHong Zhang barray -=bs2; 5760880e062SHong Zhang } 5770880e062SHong Zhang 5780880e062SHong Zhang if (in[j] >= cstart && in[j] < cend) { 5790880e062SHong Zhang col = in[j] - cstart; 58036bd2089SBarry Smith ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 58126fbe8dcSKarl Rupp } else if (in[j] < 0) continue; 5822515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 583bb003d0fSBarry Smith else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 5840880e062SHong Zhang #endif 5850880e062SHong Zhang else { 5860880e062SHong Zhang if (mat->was_assembled) { 5870880e062SHong Zhang if (!baij->colmap) { 588ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 5890880e062SHong Zhang } 5900880e062SHong Zhang 5912515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 5920880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 5931302d50aSBarry Smith { PetscInt data; 5940880e062SHong Zhang ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 595e32f2f54SBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 5960880e062SHong Zhang } 5970880e062SHong Zhang #else 598e32f2f54SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 5990880e062SHong Zhang #endif 6000880e062SHong Zhang #endif 6010880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 6020880e062SHong Zhang ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6030880e062SHong Zhang col = (col - 1)/bs; 6040880e062SHong Zhang #else 6050880e062SHong Zhang col = (baij->colmap[in[j]] - 1)/bs; 6060880e062SHong Zhang #endif 6070880e062SHong Zhang if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 608ab9863d7SBarry Smith ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 6090880e062SHong Zhang col = in[j]; 6100880e062SHong Zhang } 61126fbe8dcSKarl Rupp } else col = in[j]; 61236bd2089SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 6130880e062SHong Zhang } 6140880e062SHong Zhang } 6150880e062SHong Zhang } else { 616bb003d0fSBarry Smith if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 6170880e062SHong Zhang if (!baij->donotstash) { 6180880e062SHong Zhang if (roworiented) { 6190880e062SHong Zhang ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6200880e062SHong Zhang } else { 6210880e062SHong Zhang ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6220880e062SHong Zhang } 6230880e062SHong Zhang } 6240880e062SHong Zhang } 6250880e062SHong Zhang } 6260880e062SHong Zhang PetscFunctionReturn(0); 627a30f8f8cSSatish Balay } 628a30f8f8cSSatish Balay 6291302d50aSBarry Smith PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 630a30f8f8cSSatish Balay { 631f3566a2aSHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 6326849ba73SBarry Smith PetscErrorCode ierr; 633d0f46423SBarry Smith PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 634d0f46423SBarry Smith PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 635a30f8f8cSSatish Balay 636a30f8f8cSSatish Balay PetscFunctionBegin; 637a30f8f8cSSatish Balay for (i=0; i<m; i++) { 638e32f2f54SBarry Smith if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 639e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 640a30f8f8cSSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 641a30f8f8cSSatish Balay row = idxm[i] - bsrstart; 642a30f8f8cSSatish Balay for (j=0; j<n; j++) { 643e32f2f54SBarry Smith if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 644e32f2f54SBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 645a30f8f8cSSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend) { 646a30f8f8cSSatish Balay col = idxn[j] - bscstart; 647c8407628SSatish Balay ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 648a30f8f8cSSatish Balay } else { 649a30f8f8cSSatish Balay if (!baij->colmap) { 650ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 651a30f8f8cSSatish Balay } 652a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 653a30f8f8cSSatish Balay ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 654a30f8f8cSSatish Balay data--; 655a30f8f8cSSatish Balay #else 656a30f8f8cSSatish Balay data = baij->colmap[idxn[j]/bs]-1; 657a30f8f8cSSatish Balay #endif 658a30f8f8cSSatish Balay if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 659a30f8f8cSSatish Balay else { 660a30f8f8cSSatish Balay col = data + idxn[j]%bs; 661e249d750SSatish Balay ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 662a30f8f8cSSatish Balay } 663a30f8f8cSSatish Balay } 664a30f8f8cSSatish Balay } 665f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 666a30f8f8cSSatish Balay } 667a30f8f8cSSatish Balay PetscFunctionReturn(0); 668a30f8f8cSSatish Balay } 669a30f8f8cSSatish Balay 670dfbe8321SBarry Smith PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 671a30f8f8cSSatish Balay { 672a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 673dfbe8321SBarry Smith PetscErrorCode ierr; 674a30f8f8cSSatish Balay PetscReal sum[2],*lnorm2; 675a30f8f8cSSatish Balay 676a30f8f8cSSatish Balay PetscFunctionBegin; 677a30f8f8cSSatish Balay if (baij->size == 1) { 678a30f8f8cSSatish Balay ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 679a30f8f8cSSatish Balay } else { 680a30f8f8cSSatish Balay if (type == NORM_FROBENIUS) { 681785e854fSJed Brown ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 682a30f8f8cSSatish Balay ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 683a30f8f8cSSatish Balay *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 684a30f8f8cSSatish Balay ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 685a30f8f8cSSatish Balay *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 686b2566f29SBarry Smith ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6878f1a2a5eSBarry Smith *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 688a30f8f8cSSatish Balay ierr = PetscFree(lnorm2);CHKERRQ(ierr); 6890b8dc8d2SHong Zhang } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 6900b8dc8d2SHong Zhang Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 6910b8dc8d2SHong Zhang Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 6920b8dc8d2SHong Zhang PetscReal *rsum,*rsum2,vabs; 693899cda47SBarry Smith PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 694d0f46423SBarry Smith PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 6950b8dc8d2SHong Zhang MatScalar *v; 6960b8dc8d2SHong Zhang 697dcca6d9dSJed Brown ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 698d0f46423SBarry Smith ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 6990b8dc8d2SHong Zhang /* Amat */ 7000b8dc8d2SHong Zhang v = amat->a; jj = amat->j; 7010b8dc8d2SHong Zhang for (brow=0; brow<mbs; brow++) { 7020b8dc8d2SHong Zhang grow = bs*(rstart + brow); 7030b8dc8d2SHong Zhang nz = amat->i[brow+1] - amat->i[brow]; 7040b8dc8d2SHong Zhang for (bcol=0; bcol<nz; bcol++) { 7050b8dc8d2SHong Zhang gcol = bs*(rstart + *jj); jj++; 7060b8dc8d2SHong Zhang for (col=0; col<bs; col++) { 7070b8dc8d2SHong Zhang for (row=0; row<bs; row++) { 7080b8dc8d2SHong Zhang vabs = PetscAbsScalar(*v); v++; 7090b8dc8d2SHong Zhang rsum[gcol+col] += vabs; 7100b8dc8d2SHong Zhang /* non-diagonal block */ 7110b8dc8d2SHong Zhang if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 7120b8dc8d2SHong Zhang } 7130b8dc8d2SHong Zhang } 7140b8dc8d2SHong Zhang } 71551f70360SJed Brown ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 7160b8dc8d2SHong Zhang } 7170b8dc8d2SHong Zhang /* Bmat */ 7180b8dc8d2SHong Zhang v = bmat->a; jj = bmat->j; 7190b8dc8d2SHong Zhang for (brow=0; brow<mbs; brow++) { 7200b8dc8d2SHong Zhang grow = bs*(rstart + brow); 7210b8dc8d2SHong Zhang nz = bmat->i[brow+1] - bmat->i[brow]; 7220b8dc8d2SHong Zhang for (bcol=0; bcol<nz; bcol++) { 7230b8dc8d2SHong Zhang gcol = bs*garray[*jj]; jj++; 7240b8dc8d2SHong Zhang for (col=0; col<bs; col++) { 7250b8dc8d2SHong Zhang for (row=0; row<bs; row++) { 7260b8dc8d2SHong Zhang vabs = PetscAbsScalar(*v); v++; 7270b8dc8d2SHong Zhang rsum[gcol+col] += vabs; 7280b8dc8d2SHong Zhang rsum[grow+row] += vabs; 7290b8dc8d2SHong Zhang } 7300b8dc8d2SHong Zhang } 7310b8dc8d2SHong Zhang } 73251f70360SJed Brown ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 7330b8dc8d2SHong Zhang } 734b2566f29SBarry Smith ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7350b8dc8d2SHong Zhang *norm = 0.0; 736d0f46423SBarry Smith for (col=0; col<mat->cmap->N; col++) { 7370b8dc8d2SHong Zhang if (rsum2[col] > *norm) *norm = rsum2[col]; 7380b8dc8d2SHong Zhang } 73974ed9c26SBarry Smith ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 740f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 741a30f8f8cSSatish Balay } 742a30f8f8cSSatish Balay PetscFunctionReturn(0); 743a30f8f8cSSatish Balay } 744a30f8f8cSSatish Balay 745dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 746a30f8f8cSSatish Balay { 747a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 748dfbe8321SBarry Smith PetscErrorCode ierr; 7491302d50aSBarry Smith PetscInt nstash,reallocs; 750a30f8f8cSSatish Balay 751a30f8f8cSSatish Balay PetscFunctionBegin; 75226fbe8dcSKarl Rupp if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 753a30f8f8cSSatish Balay 754d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 7551e2582c4SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 756a30f8f8cSSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 7571e2582c4SBarry Smith ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 758a30f8f8cSSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 7591e2582c4SBarry Smith ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 760a30f8f8cSSatish Balay PetscFunctionReturn(0); 761a30f8f8cSSatish Balay } 762a30f8f8cSSatish Balay 763dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 764a30f8f8cSSatish Balay { 765a30f8f8cSSatish Balay Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 766a30f8f8cSSatish Balay Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 7676849ba73SBarry Smith PetscErrorCode ierr; 76813f74950SBarry Smith PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 769e44c0bd4SBarry Smith PetscInt *row,*col; 770ace3abfcSBarry Smith PetscBool other_disassembled; 77113f74950SBarry Smith PetscMPIInt n; 772ace3abfcSBarry Smith PetscBool r1,r2,r3; 773a30f8f8cSSatish Balay MatScalar *val; 774a30f8f8cSSatish Balay 77591c97fd4SSatish Balay /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 776a30f8f8cSSatish Balay PetscFunctionBegin; 7774cb17eb5SBarry Smith if (!baij->donotstash && !mat->nooffprocentries) { 778a30f8f8cSSatish Balay while (1) { 779a30f8f8cSSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 780a30f8f8cSSatish Balay if (!flg) break; 781a30f8f8cSSatish Balay 782a30f8f8cSSatish Balay for (i=0; i<n;) { 783a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 78426fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 78526fbe8dcSKarl Rupp if (row[j] != rstart) break; 78626fbe8dcSKarl Rupp } 787a30f8f8cSSatish Balay if (j < n) ncols = j-i; 788a30f8f8cSSatish Balay else ncols = n-i; 789a30f8f8cSSatish Balay /* Now assemble all these values with a single function call */ 7904b4eb8d3SJed Brown ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 791a30f8f8cSSatish Balay i = j; 792a30f8f8cSSatish Balay } 793a30f8f8cSSatish Balay } 794a30f8f8cSSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 795a30f8f8cSSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 796a30f8f8cSSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 797a30f8f8cSSatish Balay restore the original flags */ 798a30f8f8cSSatish Balay r1 = baij->roworiented; 799a30f8f8cSSatish Balay r2 = a->roworiented; 80091c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 80126fbe8dcSKarl Rupp 802a30f8f8cSSatish Balay baij->roworiented = PETSC_FALSE; 803a30f8f8cSSatish Balay a->roworiented = PETSC_FALSE; 80426fbe8dcSKarl Rupp 80591c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 806a30f8f8cSSatish Balay while (1) { 807a30f8f8cSSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 808a30f8f8cSSatish Balay if (!flg) break; 809a30f8f8cSSatish Balay 810a30f8f8cSSatish Balay for (i=0; i<n;) { 811a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 81226fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 81326fbe8dcSKarl Rupp if (row[j] != rstart) break; 81426fbe8dcSKarl Rupp } 815a30f8f8cSSatish Balay if (j < n) ncols = j-i; 816a30f8f8cSSatish Balay else ncols = n-i; 8174b4eb8d3SJed Brown ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 818a30f8f8cSSatish Balay i = j; 819a30f8f8cSSatish Balay } 820a30f8f8cSSatish Balay } 821a30f8f8cSSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 82226fbe8dcSKarl Rupp 823a30f8f8cSSatish Balay baij->roworiented = r1; 824a30f8f8cSSatish Balay a->roworiented = r2; 82526fbe8dcSKarl Rupp 82691c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 827a30f8f8cSSatish Balay } 828a30f8f8cSSatish Balay 829a30f8f8cSSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 830a30f8f8cSSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 831a30f8f8cSSatish Balay 832a30f8f8cSSatish Balay /* determine if any processor has disassembled, if so we must 833a30f8f8cSSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 834a30f8f8cSSatish Balay /* 835a30f8f8cSSatish Balay if nonzero structure of submatrix B cannot change then we know that 836a30f8f8cSSatish Balay no processor disassembled thus we can skip this stuff 837a30f8f8cSSatish Balay */ 838a30f8f8cSSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 839b2566f29SBarry Smith ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 840a30f8f8cSSatish Balay if (mat->was_assembled && !other_disassembled) { 841ab9863d7SBarry Smith ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 842a30f8f8cSSatish Balay } 843a30f8f8cSSatish Balay } 844a30f8f8cSSatish Balay 845a30f8f8cSSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 84640781036SHong Zhang ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 847a30f8f8cSSatish Balay } 848a30f8f8cSSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 849a30f8f8cSSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 850a30f8f8cSSatish Balay 85174ed9c26SBarry Smith ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 85226fbe8dcSKarl Rupp 853a30f8f8cSSatish Balay baij->rowvalues = 0; 8544f9cfa9eSBarry Smith 8554f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 8564f9cfa9eSBarry Smith if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 857e56f5c9eSBarry Smith PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 858b2566f29SBarry Smith ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 859e56f5c9eSBarry Smith } 860a30f8f8cSSatish Balay PetscFunctionReturn(0); 861a30f8f8cSSatish Balay } 862a30f8f8cSSatish Balay 863dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 8649804daf3SBarry Smith #include <petscdraw.h> 8656849ba73SBarry Smith static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 866a30f8f8cSSatish Balay { 867a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 868dfbe8321SBarry Smith PetscErrorCode ierr; 869d0f46423SBarry Smith PetscInt bs = mat->rmap->bs; 8707da1fb6eSBarry Smith PetscMPIInt rank = baij->rank; 871ace3abfcSBarry Smith PetscBool iascii,isdraw; 872b0a32e0cSBarry Smith PetscViewer sviewer; 873f3ef73ceSBarry Smith PetscViewerFormat format; 874a30f8f8cSSatish Balay 875a30f8f8cSSatish Balay PetscFunctionBegin; 876251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 877251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 87832077d6dSBarry Smith if (iascii) { 879b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 880456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 881a30f8f8cSSatish Balay MatInfo info; 882ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 883a30f8f8cSSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 8841575c14dSBarry Smith ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 885b1e9c6f1SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 886a30f8f8cSSatish Balay ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 887e6dd01d4SJed Brown ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 888a30f8f8cSSatish Balay ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 889e6dd01d4SJed Brown ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 890b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 8911575c14dSBarry Smith ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 89207d81ca4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 893a30f8f8cSSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 894a30f8f8cSSatish Balay PetscFunctionReturn(0); 895fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 89677431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 897a30f8f8cSSatish Balay PetscFunctionReturn(0); 898c1490034SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 899c1490034SHong Zhang PetscFunctionReturn(0); 900a30f8f8cSSatish Balay } 901a30f8f8cSSatish Balay } 902a30f8f8cSSatish Balay 903a30f8f8cSSatish Balay if (isdraw) { 904b0a32e0cSBarry Smith PetscDraw draw; 905ace3abfcSBarry Smith PetscBool isnull; 906b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 90745f3bb6eSLisandro Dalcin ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 90845f3bb6eSLisandro Dalcin if (isnull) PetscFunctionReturn(0); 909a30f8f8cSSatish Balay } 910a30f8f8cSSatish Balay 9117da1fb6eSBarry Smith { 912a30f8f8cSSatish Balay /* assemble the entire matrix onto first processor. */ 913a30f8f8cSSatish Balay Mat A; 91465d70643SHong Zhang Mat_SeqSBAIJ *Aloc; 91565d70643SHong Zhang Mat_SeqBAIJ *Bloc; 916d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 917a30f8f8cSSatish Balay MatScalar *a; 9183e219373SBarry Smith const char *matname; 919a30f8f8cSSatish Balay 920f204ca49SKris Buschelman /* Should this be the same type as mat? */ 921ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 922a30f8f8cSSatish Balay if (!rank) { 923f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 924a30f8f8cSSatish Balay } else { 925f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 926a30f8f8cSSatish Balay } 927f204ca49SKris Buschelman ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 9280298fd71SBarry Smith ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 9292b82e772SSatish Balay ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 9303bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 931a30f8f8cSSatish Balay 932a30f8f8cSSatish Balay /* copy over the A part */ 93365d70643SHong Zhang Aloc = (Mat_SeqSBAIJ*)baij->A->data; 934a30f8f8cSSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 935785e854fSJed Brown ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 936a30f8f8cSSatish Balay 937a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 938e9f7bc9eSHong Zhang rvals[0] = bs*(baij->rstartbs + i); 93926fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 940a30f8f8cSSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 941e9f7bc9eSHong Zhang col = (baij->cstartbs+aj[j])*bs; 942a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 943dd6ea824SBarry Smith ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 94426fbe8dcSKarl Rupp col++; 94526fbe8dcSKarl Rupp a += bs; 946a30f8f8cSSatish Balay } 947a30f8f8cSSatish Balay } 948a30f8f8cSSatish Balay } 949a30f8f8cSSatish Balay /* copy over the B part */ 95065d70643SHong Zhang Bloc = (Mat_SeqBAIJ*)baij->B->data; 95165d70643SHong Zhang ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 952a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 953e9f7bc9eSHong Zhang 954e9f7bc9eSHong Zhang rvals[0] = bs*(baij->rstartbs + i); 95526fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 956a30f8f8cSSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 957a30f8f8cSSatish Balay col = baij->garray[aj[j]]*bs; 958a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 959799bb49cSHong Zhang ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 96026fbe8dcSKarl Rupp col++; 96126fbe8dcSKarl Rupp a += bs; 962a30f8f8cSSatish Balay } 963a30f8f8cSSatish Balay } 964a30f8f8cSSatish Balay } 965a30f8f8cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 966a30f8f8cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967a30f8f8cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 968a30f8f8cSSatish Balay /* 969a30f8f8cSSatish Balay Everyone has to call to draw the matrix since the graphics waits are 970b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 971a30f8f8cSSatish Balay */ 9723f08860eSBarry Smith ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 973ade3a672SBarry Smith ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 9743e219373SBarry Smith if (!rank) { 975ade3a672SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 976383922c3SLisandro Dalcin ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 977a30f8f8cSSatish Balay } 9783f08860eSBarry Smith ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 9791575c14dSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 9806bf464f9SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 981a30f8f8cSSatish Balay } 982a30f8f8cSSatish Balay PetscFunctionReturn(0); 983a30f8f8cSSatish Balay } 984a30f8f8cSSatish Balay 985d1654148SHong Zhang static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer) 986d1654148SHong Zhang { 987d1654148SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data; 988d1654148SHong Zhang Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data; 989d1654148SHong Zhang Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 990d1654148SHong Zhang PetscErrorCode ierr; 991d1654148SHong Zhang PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 992d1654148SHong Zhang PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 993d1654148SHong Zhang int fd; 994d1654148SHong Zhang PetscScalar *column_values; 995d1654148SHong Zhang FILE *file; 996d1654148SHong Zhang PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 997d1654148SHong Zhang PetscInt message_count,flowcontrolcount; 998d1654148SHong Zhang 999d1654148SHong Zhang PetscFunctionBegin; 1000d1654148SHong Zhang ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1001d1654148SHong Zhang ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1002d1654148SHong Zhang nz = bs2*(A->nz + B->nz); 1003d1654148SHong Zhang rlen = mat->rmap->n; 1004d1654148SHong Zhang ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1005d1654148SHong Zhang if (!rank) { 1006d1654148SHong Zhang header[0] = MAT_FILE_CLASSID; 1007d1654148SHong Zhang header[1] = mat->rmap->N; 1008d1654148SHong Zhang header[2] = mat->cmap->N; 1009d1654148SHong Zhang 1010d1654148SHong Zhang ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1011d1654148SHong Zhang ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1012d1654148SHong Zhang /* get largest number of rows any processor has */ 1013d1654148SHong Zhang range = mat->rmap->range; 1014d1654148SHong Zhang for (i=1; i<size; i++) { 1015d1654148SHong Zhang rlen = PetscMax(rlen,range[i+1] - range[i]); 1016d1654148SHong Zhang } 1017d1654148SHong Zhang } else { 1018d1654148SHong Zhang ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1019d1654148SHong Zhang } 1020d1654148SHong Zhang 1021d1654148SHong Zhang ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1022d1654148SHong Zhang /* compute lengths of each row */ 1023d1654148SHong Zhang for (i=0; i<a->mbs; i++) { 1024d1654148SHong Zhang crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1025d1654148SHong Zhang } 1026d1654148SHong Zhang /* store the row lengths to the file */ 1027d1654148SHong Zhang ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1028d1654148SHong Zhang if (!rank) { 1029d1654148SHong Zhang MPI_Status status; 1030d1654148SHong Zhang ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1031d1654148SHong Zhang rlen = (range[1] - range[0])/bs; 1032d1654148SHong Zhang for (i=0; i<rlen; i++) { 1033d1654148SHong Zhang for (j=0; j<bs; j++) { 1034d1654148SHong Zhang row_lens[i*bs+j] = bs*crow_lens[i]; 1035d1654148SHong Zhang } 1036d1654148SHong Zhang } 1037d1654148SHong Zhang ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1038d1654148SHong Zhang for (i=1; i<size; i++) { 1039d1654148SHong Zhang rlen = (range[i+1] - range[i])/bs; 1040d1654148SHong Zhang ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1041d1654148SHong Zhang ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1042d1654148SHong Zhang for (k=0; k<rlen; k++) { 1043d1654148SHong Zhang for (j=0; j<bs; j++) { 1044d1654148SHong Zhang row_lens[k*bs+j] = bs*crow_lens[k]; 1045d1654148SHong Zhang } 1046d1654148SHong Zhang } 1047d1654148SHong Zhang ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1048d1654148SHong Zhang } 1049d1654148SHong Zhang ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1050d1654148SHong Zhang ierr = PetscFree(row_lens);CHKERRQ(ierr); 1051d1654148SHong Zhang } else { 1052d1654148SHong Zhang ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1053d1654148SHong Zhang ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1054d1654148SHong Zhang ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1055d1654148SHong Zhang } 1056d1654148SHong Zhang ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1057d1654148SHong Zhang 1058d1654148SHong Zhang /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1059d1654148SHong Zhang information needed to make it for each row from a block row. This does require more communication but still not more than 1060d1654148SHong Zhang the communication needed for the nonzero values */ 1061d1654148SHong Zhang nzmax = nz; /* space a largest processor needs */ 1062d1654148SHong Zhang ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1063d1654148SHong Zhang ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1064d1654148SHong Zhang cnt = 0; 1065d1654148SHong Zhang for (i=0; i<a->mbs; i++) { 1066d1654148SHong Zhang pcnt = cnt; 1067d1654148SHong Zhang for (j=B->i[i]; j<B->i[i+1]; j++) { 1068d1654148SHong Zhang if ((col = garray[B->j[j]]) > cstart) break; 1069d1654148SHong Zhang for (l=0; l<bs; l++) { 1070d1654148SHong Zhang column_indices[cnt++] = bs*col+l; 1071d1654148SHong Zhang } 1072d1654148SHong Zhang } 1073d1654148SHong Zhang for (k=A->i[i]; k<A->i[i+1]; k++) { 1074d1654148SHong Zhang for (l=0; l<bs; l++) { 1075d1654148SHong Zhang column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1076d1654148SHong Zhang } 1077d1654148SHong Zhang } 1078d1654148SHong Zhang for (; j<B->i[i+1]; j++) { 1079d1654148SHong Zhang for (l=0; l<bs; l++) { 1080d1654148SHong Zhang column_indices[cnt++] = bs*garray[B->j[j]]+l; 1081d1654148SHong Zhang } 1082d1654148SHong Zhang } 1083d1654148SHong Zhang len = cnt - pcnt; 1084d1654148SHong Zhang for (k=1; k<bs; k++) { 1085d1654148SHong Zhang ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1086d1654148SHong Zhang cnt += len; 1087d1654148SHong Zhang } 1088d1654148SHong Zhang } 1089d1654148SHong Zhang if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1090d1654148SHong Zhang 1091d1654148SHong Zhang /* store the columns to the file */ 1092d1654148SHong Zhang ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1093d1654148SHong Zhang if (!rank) { 1094d1654148SHong Zhang MPI_Status status; 1095d1654148SHong Zhang ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1096d1654148SHong Zhang for (i=1; i<size; i++) { 1097d1654148SHong Zhang ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1098d1654148SHong Zhang ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1099d1654148SHong Zhang ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1100d1654148SHong Zhang ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1101d1654148SHong Zhang } 1102d1654148SHong Zhang ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1103d1654148SHong Zhang } else { 1104d1654148SHong Zhang ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1105d1654148SHong Zhang ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1106d1654148SHong Zhang ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1107d1654148SHong Zhang ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1108d1654148SHong Zhang } 1109d1654148SHong Zhang ierr = PetscFree(column_indices);CHKERRQ(ierr); 1110d1654148SHong Zhang 1111d1654148SHong Zhang /* load up the numerical values */ 1112d1654148SHong Zhang ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1113d1654148SHong Zhang cnt = 0; 1114d1654148SHong Zhang for (i=0; i<a->mbs; i++) { 1115d1654148SHong Zhang rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1116d1654148SHong Zhang for (j=B->i[i]; j<B->i[i+1]; j++) { 1117d1654148SHong Zhang if (garray[B->j[j]] > cstart) break; 1118d1654148SHong Zhang for (l=0; l<bs; l++) { 1119d1654148SHong Zhang for (ll=0; ll<bs; ll++) { 1120d1654148SHong Zhang column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1121d1654148SHong Zhang } 1122d1654148SHong Zhang } 1123d1654148SHong Zhang cnt += bs; 1124d1654148SHong Zhang } 1125d1654148SHong Zhang for (k=A->i[i]; k<A->i[i+1]; k++) { 1126d1654148SHong Zhang for (l=0; l<bs; l++) { 1127d1654148SHong Zhang for (ll=0; ll<bs; ll++) { 1128d1654148SHong Zhang column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1129d1654148SHong Zhang } 1130d1654148SHong Zhang } 1131d1654148SHong Zhang cnt += bs; 1132d1654148SHong Zhang } 1133d1654148SHong Zhang for (; j<B->i[i+1]; j++) { 1134d1654148SHong Zhang for (l=0; l<bs; l++) { 1135d1654148SHong Zhang for (ll=0; ll<bs; ll++) { 1136d1654148SHong Zhang column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1137d1654148SHong Zhang } 1138d1654148SHong Zhang } 1139d1654148SHong Zhang cnt += bs; 1140d1654148SHong Zhang } 1141d1654148SHong Zhang cnt += (bs-1)*rlen; 1142d1654148SHong Zhang } 1143d1654148SHong Zhang if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1144d1654148SHong Zhang 1145d1654148SHong Zhang /* store the column values to the file */ 1146d1654148SHong Zhang ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1147d1654148SHong Zhang if (!rank) { 1148d1654148SHong Zhang MPI_Status status; 1149d1654148SHong Zhang ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1150d1654148SHong Zhang for (i=1; i<size; i++) { 1151d1654148SHong Zhang ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1152d1654148SHong Zhang ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1153d1654148SHong Zhang ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1154d1654148SHong Zhang ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1155d1654148SHong Zhang } 1156d1654148SHong Zhang ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1157d1654148SHong Zhang } else { 1158d1654148SHong Zhang ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1159d1654148SHong Zhang ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1160d1654148SHong Zhang ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1161d1654148SHong Zhang ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1162d1654148SHong Zhang } 1163d1654148SHong Zhang ierr = PetscFree(column_values);CHKERRQ(ierr); 1164d1654148SHong Zhang 1165d1654148SHong Zhang ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1166d1654148SHong Zhang if (file) { 1167d1654148SHong Zhang fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1168d1654148SHong Zhang } 1169d1654148SHong Zhang PetscFunctionReturn(0); 1170d1654148SHong Zhang } 1171d1654148SHong Zhang 1172dfbe8321SBarry Smith PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 1173a30f8f8cSSatish Balay { 1174dfbe8321SBarry Smith PetscErrorCode ierr; 1175ace3abfcSBarry Smith PetscBool iascii,isdraw,issocket,isbinary; 1176a30f8f8cSSatish Balay 1177a30f8f8cSSatish Balay PetscFunctionBegin; 1178251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1179251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1180251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1181251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1182d1654148SHong Zhang if (iascii || isdraw || issocket) { 1183a30f8f8cSSatish Balay ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1184d1654148SHong Zhang } else if (isbinary) { 1185d1654148SHong Zhang ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1186a30f8f8cSSatish Balay } 1187a30f8f8cSSatish Balay PetscFunctionReturn(0); 1188a30f8f8cSSatish Balay } 1189a30f8f8cSSatish Balay 1190dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1191a30f8f8cSSatish Balay { 1192a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1193dfbe8321SBarry Smith PetscErrorCode ierr; 1194a30f8f8cSSatish Balay 1195a30f8f8cSSatish Balay PetscFunctionBegin; 1196a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG) 1197d0f46423SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1198a30f8f8cSSatish Balay #endif 1199a30f8f8cSSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1200a30f8f8cSSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 12016bf464f9SBarry Smith ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 12026bf464f9SBarry Smith ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1203a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 12046bc0bbbfSBarry Smith ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1205a30f8f8cSSatish Balay #else 120605b42c5fSBarry Smith ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1207a30f8f8cSSatish Balay #endif 120805b42c5fSBarry Smith ierr = PetscFree(baij->garray);CHKERRQ(ierr); 12096bf464f9SBarry Smith ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 12106bf464f9SBarry Smith ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 12116bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 12126bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 12136bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 12146bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 12156bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 12166bf464f9SBarry Smith ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 12175755ff91SHong Zhang ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 121805b42c5fSBarry Smith ierr = PetscFree(baij->barray);CHKERRQ(ierr); 121905b42c5fSBarry Smith ierr = PetscFree(baij->hd);CHKERRQ(ierr); 12206bf464f9SBarry Smith ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 12216bf464f9SBarry Smith ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 12226bf464f9SBarry Smith ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1223ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 122405b42c5fSBarry Smith ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1225a30f8f8cSSatish Balay #endif 122659ffdab8SBarry Smith ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 122759ffdab8SBarry Smith ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1228899cda47SBarry Smith ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1229bf0cc555SLisandro Dalcin ierr = PetscFree(mat->data);CHKERRQ(ierr); 1230901853e0SKris Buschelman 1231dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1232bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1233bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1234bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 12356214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 12366214f412SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 12376214f412SHong Zhang #endif 1238b147fbf3SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1239b147fbf3SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1240a30f8f8cSSatish Balay PetscFunctionReturn(0); 1241a30f8f8cSSatish Balay } 1242a30f8f8cSSatish Balay 1243547795f9SHong Zhang PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1244547795f9SHong Zhang { 1245547795f9SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1246547795f9SHong Zhang PetscErrorCode ierr; 1247547795f9SHong Zhang PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 12486de40e93SBarry Smith PetscScalar *from; 12496de40e93SBarry Smith const PetscScalar *x; 1250547795f9SHong Zhang 1251547795f9SHong Zhang PetscFunctionBegin; 1252547795f9SHong Zhang ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1253e7e72b3dSBarry Smith if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1254547795f9SHong Zhang 1255547795f9SHong Zhang /* diagonal part */ 1256547795f9SHong Zhang ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1257547795f9SHong Zhang ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1258547795f9SHong Zhang 1259547795f9SHong Zhang /* subdiagonal part */ 1260547795f9SHong Zhang ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1261547795f9SHong Zhang 1262547795f9SHong Zhang /* copy x into the vec slvec0 */ 1263547795f9SHong Zhang ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 12646de40e93SBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1265547795f9SHong Zhang 1266547795f9SHong Zhang ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1267547795f9SHong Zhang ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 12686de40e93SBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1269547795f9SHong Zhang 1270547795f9SHong Zhang ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1271547795f9SHong Zhang ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1272547795f9SHong Zhang /* supperdiagonal part */ 1273547795f9SHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1274547795f9SHong Zhang PetscFunctionReturn(0); 1275547795f9SHong Zhang } 1276547795f9SHong Zhang 1277dfbe8321SBarry Smith PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1278a9d4b620SHong Zhang { 1279a9d4b620SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1280dfbe8321SBarry Smith PetscErrorCode ierr; 1281d0f46423SBarry Smith PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 1282d9ca1df4SBarry Smith PetscScalar *from; 1283d9ca1df4SBarry Smith const PetscScalar *x; 1284a9d4b620SHong Zhang 1285a9d4b620SHong Zhang PetscFunctionBegin; 1286a9d4b620SHong Zhang ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1287e7e72b3dSBarry Smith if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1288a9d4b620SHong Zhang 1289a9d4b620SHong Zhang /* diagonal part */ 1290a9d4b620SHong Zhang ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1291fa22f6d0SBarry Smith ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1292a9d4b620SHong Zhang 1293a9d4b620SHong Zhang /* subdiagonal part */ 1294a9d4b620SHong Zhang ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1295fc165ae2SBarry Smith 1296a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 12971ebc52fbSHong Zhang ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1298d9ca1df4SBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1299a9d4b620SHong Zhang 1300fc165ae2SBarry Smith ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 1301fc165ae2SBarry Smith ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1302d9ca1df4SBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1303fc165ae2SBarry Smith 1304fc165ae2SBarry Smith ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305ca9f406cSSatish Balay ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1306a9d4b620SHong Zhang /* supperdiagonal part */ 1307a9d4b620SHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1308a9d4b620SHong Zhang PetscFunctionReturn(0); 1309a9d4b620SHong Zhang } 1310a9d4b620SHong Zhang 1311dfbe8321SBarry Smith PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1312a30f8f8cSSatish Balay { 1313a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1314dfbe8321SBarry Smith PetscErrorCode ierr; 13151302d50aSBarry Smith PetscInt nt; 1316a30f8f8cSSatish Balay 1317a30f8f8cSSatish Balay PetscFunctionBegin; 1318a30f8f8cSSatish Balay ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1319e7e72b3dSBarry Smith if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1320e7e72b3dSBarry Smith 1321a30f8f8cSSatish Balay ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1322e7e72b3dSBarry Smith if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 132365d70643SHong Zhang 1324ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325b941877fSHong Zhang /* do diagonal part */ 1326b941877fSHong Zhang ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1327b941877fSHong Zhang /* do supperdiagonal part */ 1328ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1329b941877fSHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1330b941877fSHong Zhang /* do subdiagonal part */ 1331b941877fSHong Zhang ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1332ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1333ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1334a30f8f8cSSatish Balay PetscFunctionReturn(0); 1335a30f8f8cSSatish Balay } 1336a30f8f8cSSatish Balay 1337dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1338a30f8f8cSSatish Balay { 1339de8b6608SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1340dfbe8321SBarry Smith PetscErrorCode ierr; 1341d0f46423SBarry Smith PetscInt mbs=a->mbs,bs=A->rmap->bs; 1342d9ca1df4SBarry Smith PetscScalar *from,zero=0.0; 1343d9ca1df4SBarry Smith const PetscScalar *x; 1344a9d4b620SHong Zhang 1345a9d4b620SHong Zhang PetscFunctionBegin; 1346a9d4b620SHong Zhang /* 1347ce94432eSBarry Smith PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 13480ec8b6e3SBarry Smith PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 1349a9d4b620SHong Zhang */ 1350a9d4b620SHong Zhang /* diagonal part */ 1351a9d4b620SHong Zhang ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 13522dcb1b2aSMatthew Knepley ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1353a9d4b620SHong Zhang 1354a9d4b620SHong Zhang /* subdiagonal part */ 1355a9d4b620SHong Zhang ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1356a9d4b620SHong Zhang 1357a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 13581ebc52fbSHong Zhang ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1359d9ca1df4SBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1360a9d4b620SHong Zhang ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 13611ebc52fbSHong Zhang ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1362a9d4b620SHong Zhang 1363ca9f406cSSatish Balay ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1364d9ca1df4SBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1365ca9f406cSSatish Balay ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1366a9d4b620SHong Zhang 1367a9d4b620SHong Zhang /* supperdiagonal part */ 1368a9d4b620SHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1369a9d4b620SHong Zhang PetscFunctionReturn(0); 1370a9d4b620SHong Zhang } 1371a9d4b620SHong Zhang 1372dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1373a9d4b620SHong Zhang { 1374a9d4b620SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1375dfbe8321SBarry Smith PetscErrorCode ierr; 1376a30f8f8cSSatish Balay 1377a30f8f8cSSatish Balay PetscFunctionBegin; 1378ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1379b941877fSHong Zhang /* do diagonal part */ 1380b941877fSHong Zhang ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1381b941877fSHong Zhang /* do supperdiagonal part */ 1382ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1383de8b6608SHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1384de8b6608SHong Zhang 1385b941877fSHong Zhang /* do subdiagonal part */ 1386a30f8f8cSSatish Balay ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1387ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1388ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389a30f8f8cSSatish Balay PetscFunctionReturn(0); 1390a30f8f8cSSatish Balay } 1391a30f8f8cSSatish Balay 1392a30f8f8cSSatish Balay /* 1393a30f8f8cSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1394a30f8f8cSSatish Balay diagonal block 1395a30f8f8cSSatish Balay */ 1396dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1397a30f8f8cSSatish Balay { 1398a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1399dfbe8321SBarry Smith PetscErrorCode ierr; 1400a30f8f8cSSatish Balay 1401a30f8f8cSSatish Balay PetscFunctionBegin; 1402e32f2f54SBarry Smith /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1403a30f8f8cSSatish Balay ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1404a30f8f8cSSatish Balay PetscFunctionReturn(0); 1405a30f8f8cSSatish Balay } 1406a30f8f8cSSatish Balay 1407f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1408a30f8f8cSSatish Balay { 1409a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1410dfbe8321SBarry Smith PetscErrorCode ierr; 1411a30f8f8cSSatish Balay 1412a30f8f8cSSatish Balay PetscFunctionBegin; 1413f4df32b1SMatthew Knepley ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1414f4df32b1SMatthew Knepley ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1415a30f8f8cSSatish Balay PetscFunctionReturn(0); 1416a30f8f8cSSatish Balay } 1417a30f8f8cSSatish Balay 14181302d50aSBarry Smith PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1419a30f8f8cSSatish Balay { 1420d0d4cfc2SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1421d0d4cfc2SHong Zhang PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1422d0d4cfc2SHong Zhang PetscErrorCode ierr; 1423d0f46423SBarry Smith PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1424d0f46423SBarry Smith PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1425899cda47SBarry Smith PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1426d0d4cfc2SHong Zhang 1427a30f8f8cSSatish Balay PetscFunctionBegin; 1428e32f2f54SBarry Smith if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1429d0d4cfc2SHong Zhang mat->getrowactive = PETSC_TRUE; 1430d0d4cfc2SHong Zhang 1431d0d4cfc2SHong Zhang if (!mat->rowvalues && (idx || v)) { 1432d0d4cfc2SHong Zhang /* 1433d0d4cfc2SHong Zhang allocate enough space to hold information from the longest row. 1434d0d4cfc2SHong Zhang */ 1435d0d4cfc2SHong Zhang Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1436d0d4cfc2SHong Zhang Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1437d0d4cfc2SHong Zhang PetscInt max = 1,mbs = mat->mbs,tmp; 1438d0d4cfc2SHong Zhang for (i=0; i<mbs; i++) { 1439d0d4cfc2SHong Zhang tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 144026fbe8dcSKarl Rupp if (max < tmp) max = tmp; 1441d0d4cfc2SHong Zhang } 1442dcca6d9dSJed Brown ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1443d0d4cfc2SHong Zhang } 1444d0d4cfc2SHong Zhang 1445e7e72b3dSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1446d0d4cfc2SHong Zhang lrow = row - brstart; /* local row index */ 1447d0d4cfc2SHong Zhang 1448d0d4cfc2SHong Zhang pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1449d0d4cfc2SHong Zhang if (!v) {pvA = 0; pvB = 0;} 1450d0d4cfc2SHong Zhang if (!idx) {pcA = 0; if (!v) pcB = 0;} 1451d0d4cfc2SHong Zhang ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1452d0d4cfc2SHong Zhang ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1453d0d4cfc2SHong Zhang nztot = nzA + nzB; 1454d0d4cfc2SHong Zhang 1455d0d4cfc2SHong Zhang cmap = mat->garray; 1456d0d4cfc2SHong Zhang if (v || idx) { 1457d0d4cfc2SHong Zhang if (nztot) { 1458d0d4cfc2SHong Zhang /* Sort by increasing column numbers, assuming A and B already sorted */ 1459d0d4cfc2SHong Zhang PetscInt imark = -1; 1460d0d4cfc2SHong Zhang if (v) { 1461d0d4cfc2SHong Zhang *v = v_p = mat->rowvalues; 1462d0d4cfc2SHong Zhang for (i=0; i<nzB; i++) { 1463d0d4cfc2SHong Zhang if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1464d0d4cfc2SHong Zhang else break; 1465d0d4cfc2SHong Zhang } 1466d0d4cfc2SHong Zhang imark = i; 1467d0d4cfc2SHong Zhang for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1468d0d4cfc2SHong Zhang for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1469d0d4cfc2SHong Zhang } 1470d0d4cfc2SHong Zhang if (idx) { 1471d0d4cfc2SHong Zhang *idx = idx_p = mat->rowindices; 1472d0d4cfc2SHong Zhang if (imark > -1) { 1473d0d4cfc2SHong Zhang for (i=0; i<imark; i++) { 1474d0d4cfc2SHong Zhang idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1475d0d4cfc2SHong Zhang } 1476d0d4cfc2SHong Zhang } else { 1477d0d4cfc2SHong Zhang for (i=0; i<nzB; i++) { 147826fbe8dcSKarl Rupp if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1479d0d4cfc2SHong Zhang else break; 1480d0d4cfc2SHong Zhang } 1481d0d4cfc2SHong Zhang imark = i; 1482d0d4cfc2SHong Zhang } 1483d0d4cfc2SHong Zhang for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1484d0d4cfc2SHong Zhang for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1485d0d4cfc2SHong Zhang } 1486d0d4cfc2SHong Zhang } else { 1487d0d4cfc2SHong Zhang if (idx) *idx = 0; 1488d0d4cfc2SHong Zhang if (v) *v = 0; 1489d0d4cfc2SHong Zhang } 1490d0d4cfc2SHong Zhang } 1491d0d4cfc2SHong Zhang *nz = nztot; 1492d0d4cfc2SHong Zhang ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1493d0d4cfc2SHong Zhang ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1494a30f8f8cSSatish Balay PetscFunctionReturn(0); 1495a30f8f8cSSatish Balay } 1496a30f8f8cSSatish Balay 14971302d50aSBarry Smith PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1498a30f8f8cSSatish Balay { 1499a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1500a30f8f8cSSatish Balay 1501a30f8f8cSSatish Balay PetscFunctionBegin; 1502e7e72b3dSBarry Smith if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1503a30f8f8cSSatish Balay baij->getrowactive = PETSC_FALSE; 1504a30f8f8cSSatish Balay PetscFunctionReturn(0); 1505a30f8f8cSSatish Balay } 1506a30f8f8cSSatish Balay 1507d0d4cfc2SHong Zhang PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1508d0d4cfc2SHong Zhang { 1509d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1510d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1511d0d4cfc2SHong Zhang 1512d0d4cfc2SHong Zhang PetscFunctionBegin; 1513d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_TRUE; 1514d0d4cfc2SHong Zhang PetscFunctionReturn(0); 1515d0d4cfc2SHong Zhang } 1516d0d4cfc2SHong Zhang PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1517d0d4cfc2SHong Zhang { 1518d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1519d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1520d0d4cfc2SHong Zhang 1521d0d4cfc2SHong Zhang PetscFunctionBegin; 1522d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_FALSE; 1523d0d4cfc2SHong Zhang PetscFunctionReturn(0); 1524d0d4cfc2SHong Zhang } 1525d0d4cfc2SHong Zhang 152699cafbc1SBarry Smith PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 152799cafbc1SBarry Smith { 152899cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 152999cafbc1SBarry Smith PetscErrorCode ierr; 153099cafbc1SBarry Smith 153199cafbc1SBarry Smith PetscFunctionBegin; 153299cafbc1SBarry Smith ierr = MatRealPart(a->A);CHKERRQ(ierr); 153399cafbc1SBarry Smith ierr = MatRealPart(a->B);CHKERRQ(ierr); 153499cafbc1SBarry Smith PetscFunctionReturn(0); 153599cafbc1SBarry Smith } 153699cafbc1SBarry Smith 153799cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 153899cafbc1SBarry Smith { 153999cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 154099cafbc1SBarry Smith PetscErrorCode ierr; 154199cafbc1SBarry Smith 154299cafbc1SBarry Smith PetscFunctionBegin; 154399cafbc1SBarry Smith ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 154499cafbc1SBarry Smith ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 154599cafbc1SBarry Smith PetscFunctionReturn(0); 154699cafbc1SBarry Smith } 154799cafbc1SBarry Smith 15487dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 154936032a97SHong Zhang Input: isrow - distributed(parallel), 155036032a97SHong Zhang iscol_local - locally owned (seq) 155136032a97SHong Zhang */ 155236032a97SHong Zhang PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 15538f46ffcaSHong Zhang { 15548f46ffcaSHong Zhang PetscErrorCode ierr; 15558f46ffcaSHong Zhang PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 15568f46ffcaSHong Zhang const PetscInt *ptr1,*ptr2; 155736032a97SHong Zhang 155836032a97SHong Zhang PetscFunctionBegin; 15598f46ffcaSHong Zhang ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 15608f46ffcaSHong Zhang ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 15611098a8e8SHong Zhang if (sz1 > sz2) { 15621098a8e8SHong Zhang *flg = PETSC_FALSE; 15631098a8e8SHong Zhang PetscFunctionReturn(0); 15641098a8e8SHong Zhang } 15658f46ffcaSHong Zhang 15668f46ffcaSHong Zhang ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 15678f46ffcaSHong Zhang ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 15688f46ffcaSHong Zhang 15698f46ffcaSHong Zhang ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 15708f46ffcaSHong Zhang ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 15718f46ffcaSHong Zhang ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 15728f46ffcaSHong Zhang ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 15738f46ffcaSHong Zhang ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 15748f46ffcaSHong Zhang ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 15758f46ffcaSHong Zhang 15768f46ffcaSHong Zhang nmatch=0; 15778f46ffcaSHong Zhang k = 0; 15788f46ffcaSHong Zhang for (i=0; i<sz1; i++){ 15798f46ffcaSHong Zhang for (j=k; j<sz2; j++){ 15808f46ffcaSHong Zhang if (a1[i] == a2[j]) { 15818f46ffcaSHong Zhang k = j; nmatch++; 15828f46ffcaSHong Zhang break; 15838f46ffcaSHong Zhang } 15848f46ffcaSHong Zhang } 15858f46ffcaSHong Zhang } 15868f46ffcaSHong Zhang ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 15878f46ffcaSHong Zhang ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 15888f46ffcaSHong Zhang ierr = PetscFree(a1);CHKERRQ(ierr); 15898f46ffcaSHong Zhang ierr = PetscFree(a2);CHKERRQ(ierr); 15901098a8e8SHong Zhang if (nmatch < sz1) { 15911098a8e8SHong Zhang *flg = PETSC_FALSE; 15921098a8e8SHong Zhang } else { 15931098a8e8SHong Zhang *flg = PETSC_TRUE; 15941098a8e8SHong Zhang } 159536032a97SHong Zhang PetscFunctionReturn(0); 15968f46ffcaSHong Zhang } 159736032a97SHong Zhang 15987dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 159936032a97SHong Zhang { 160036032a97SHong Zhang PetscErrorCode ierr; 160136032a97SHong Zhang IS iscol_local; 160236032a97SHong Zhang PetscInt csize; 160336032a97SHong Zhang PetscBool isequal; 160436032a97SHong Zhang 160536032a97SHong Zhang PetscFunctionBegin; 160636032a97SHong Zhang ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 160736032a97SHong Zhang if (call == MAT_REUSE_MATRIX) { 160836032a97SHong Zhang ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 160936032a97SHong Zhang if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 161036032a97SHong Zhang } else { 161136032a97SHong Zhang ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 161236032a97SHong Zhang ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 161336032a97SHong Zhang if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 16148f46ffcaSHong Zhang } 16158f46ffcaSHong Zhang 16167dae84e0SHong Zhang /* now call MatCreateSubMatrix_MPIBAIJ() */ 16177dae84e0SHong Zhang ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 16188f46ffcaSHong Zhang if (call == MAT_INITIAL_MATRIX) { 16198f46ffcaSHong Zhang ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 16208f46ffcaSHong Zhang ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 16218f46ffcaSHong Zhang } 16228f46ffcaSHong Zhang PetscFunctionReturn(0); 16238f46ffcaSHong Zhang } 16248f46ffcaSHong Zhang 1625dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1626a30f8f8cSSatish Balay { 1627a30f8f8cSSatish Balay Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1628dfbe8321SBarry Smith PetscErrorCode ierr; 1629a30f8f8cSSatish Balay 1630a30f8f8cSSatish Balay PetscFunctionBegin; 1631a30f8f8cSSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1632a30f8f8cSSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1633a30f8f8cSSatish Balay PetscFunctionReturn(0); 1634a30f8f8cSSatish Balay } 1635a30f8f8cSSatish Balay 1636dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1637a30f8f8cSSatish Balay { 1638a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1639a30f8f8cSSatish Balay Mat A = a->A,B = a->B; 1640dfbe8321SBarry Smith PetscErrorCode ierr; 1641a30f8f8cSSatish Balay PetscReal isend[5],irecv[5]; 1642a30f8f8cSSatish Balay 1643a30f8f8cSSatish Balay PetscFunctionBegin; 1644d0f46423SBarry Smith info->block_size = (PetscReal)matin->rmap->bs; 164526fbe8dcSKarl Rupp 1646a30f8f8cSSatish Balay ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 164726fbe8dcSKarl Rupp 1648a30f8f8cSSatish Balay isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1649a30f8f8cSSatish Balay isend[3] = info->memory; isend[4] = info->mallocs; 165026fbe8dcSKarl Rupp 1651a30f8f8cSSatish Balay ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 165226fbe8dcSKarl Rupp 1653a30f8f8cSSatish Balay isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1654a30f8f8cSSatish Balay isend[3] += info->memory; isend[4] += info->mallocs; 1655a30f8f8cSSatish Balay if (flag == MAT_LOCAL) { 1656a30f8f8cSSatish Balay info->nz_used = isend[0]; 1657a30f8f8cSSatish Balay info->nz_allocated = isend[1]; 1658a30f8f8cSSatish Balay info->nz_unneeded = isend[2]; 1659a30f8f8cSSatish Balay info->memory = isend[3]; 1660a30f8f8cSSatish Balay info->mallocs = isend[4]; 1661a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1662b2566f29SBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 166326fbe8dcSKarl Rupp 1664a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1665a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1666a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1667a30f8f8cSSatish Balay info->memory = irecv[3]; 1668a30f8f8cSSatish Balay info->mallocs = irecv[4]; 1669a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1670b2566f29SBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 167126fbe8dcSKarl Rupp 1672a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1673a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1674a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1675a30f8f8cSSatish Balay info->memory = irecv[3]; 1676a30f8f8cSSatish Balay info->mallocs = irecv[4]; 1677f23aa3ddSBarry Smith } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1678a30f8f8cSSatish Balay info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1679a30f8f8cSSatish Balay info->fill_ratio_needed = 0; 1680a30f8f8cSSatish Balay info->factor_mallocs = 0; 1681a30f8f8cSSatish Balay PetscFunctionReturn(0); 1682a30f8f8cSSatish Balay } 1683a30f8f8cSSatish Balay 1684ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1685a30f8f8cSSatish Balay { 1686a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1687d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1688dfbe8321SBarry Smith PetscErrorCode ierr; 1689a30f8f8cSSatish Balay 1690a30f8f8cSSatish Balay PetscFunctionBegin; 1691e98b92d7SKris Buschelman switch (op) { 1692512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 1693e98b92d7SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 169428b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1695a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 1696c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1697e98b92d7SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 169843674050SBarry Smith MatCheckPreallocated(A,1); 16994e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 17004e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1701e98b92d7SKris Buschelman break; 1702e98b92d7SKris Buschelman case MAT_ROW_ORIENTED: 170343674050SBarry Smith MatCheckPreallocated(A,1); 17044e0d8c25SBarry Smith a->roworiented = flg; 170526fbe8dcSKarl Rupp 17064e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 17074e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1708e98b92d7SKris Buschelman break; 17094e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 1710290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1711e98b92d7SKris Buschelman break; 1712e98b92d7SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 17134e0d8c25SBarry Smith a->donotstash = flg; 1714e98b92d7SKris Buschelman break; 1715e98b92d7SKris Buschelman case MAT_USE_HASH_TABLE: 17164e0d8c25SBarry Smith a->ht_flag = flg; 1717e98b92d7SKris Buschelman break; 17189a4540c5SBarry Smith case MAT_HERMITIAN: 171943674050SBarry Smith MatCheckPreallocated(A,1); 1720e32f2f54SBarry Smith if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1721eeffb40dSHong Zhang ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 17220f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX) 1723547795f9SHong Zhang A->ops->mult = MatMult_MPISBAIJ_Hermitian; 17240f2140c7SStefano Zampini #endif 1725eeffb40dSHong Zhang break; 1726ffa07934SHong Zhang case MAT_SPD: 1727ffa07934SHong Zhang A->spd_set = PETSC_TRUE; 1728ffa07934SHong Zhang A->spd = flg; 1729ffa07934SHong Zhang if (flg) { 1730ffa07934SHong Zhang A->symmetric = PETSC_TRUE; 1731ffa07934SHong Zhang A->structurally_symmetric = PETSC_TRUE; 1732ffa07934SHong Zhang A->symmetric_set = PETSC_TRUE; 1733ffa07934SHong Zhang A->structurally_symmetric_set = PETSC_TRUE; 1734ffa07934SHong Zhang } 1735ffa07934SHong Zhang break; 173677e54ba9SKris Buschelman case MAT_SYMMETRIC: 173743674050SBarry Smith MatCheckPreallocated(A,1); 1738eeffb40dSHong Zhang ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1739eeffb40dSHong Zhang break; 174077e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 174143674050SBarry Smith MatCheckPreallocated(A,1); 1742eeffb40dSHong Zhang ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743eeffb40dSHong Zhang break; 17449a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1745e32f2f54SBarry Smith if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1746290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 174777e54ba9SKris Buschelman break; 1748d0d4cfc2SHong Zhang case MAT_IGNORE_LOWER_TRIANGULAR: 17494e0d8c25SBarry Smith aA->ignore_ltriangular = flg; 1750d0d4cfc2SHong Zhang break; 1751d0d4cfc2SHong Zhang case MAT_ERROR_LOWER_TRIANGULAR: 17524e0d8c25SBarry Smith aA->ignore_ltriangular = flg; 1753d0d4cfc2SHong Zhang break; 1754d0d4cfc2SHong Zhang case MAT_GETROW_UPPERTRIANGULAR: 17554e0d8c25SBarry Smith aA->getrow_utriangular = flg; 1756d0d4cfc2SHong Zhang break; 1757e98b92d7SKris Buschelman default: 1758e32f2f54SBarry Smith SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1759a30f8f8cSSatish Balay } 1760a30f8f8cSSatish Balay PetscFunctionReturn(0); 1761a30f8f8cSSatish Balay } 1762a30f8f8cSSatish Balay 1763fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1764a30f8f8cSSatish Balay { 1765dfbe8321SBarry Smith PetscErrorCode ierr; 17666e111a19SKarl Rupp 1767a30f8f8cSSatish Balay PetscFunctionBegin; 1768cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 1769999d9058SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1770cf37664fSBarry Smith } else if (reuse == MAT_REUSE_MATRIX) { 1771cf37664fSBarry Smith ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1772fc4dec0aSBarry Smith } 17738115998fSBarry Smith PetscFunctionReturn(0); 1774a30f8f8cSSatish Balay } 1775a30f8f8cSSatish Balay 1776dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1777a30f8f8cSSatish Balay { 1778a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1779a30f8f8cSSatish Balay Mat a = baij->A, b=baij->B; 1780dfbe8321SBarry Smith PetscErrorCode ierr; 17815e90f9d9SHong Zhang PetscInt nv,m,n; 1782ace3abfcSBarry Smith PetscBool flg; 1783a30f8f8cSSatish Balay 1784a30f8f8cSSatish Balay PetscFunctionBegin; 1785a30f8f8cSSatish Balay if (ll != rr) { 1786b3bf805bSHong Zhang ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1787e7e72b3dSBarry Smith if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1788a30f8f8cSSatish Balay } 1789b3bf805bSHong Zhang if (!ll) PetscFunctionReturn(0); 1790b3bf805bSHong Zhang 17915e90f9d9SHong Zhang ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1792e32f2f54SBarry Smith if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1793b3bf805bSHong Zhang 17945e90f9d9SHong Zhang ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1795e32f2f54SBarry Smith if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 17965e90f9d9SHong Zhang 1797ca9f406cSSatish Balay ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 17985e90f9d9SHong Zhang 17995e90f9d9SHong Zhang /* left diagonalscale the off-diagonal part */ 18000298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 18015e90f9d9SHong Zhang 18025e90f9d9SHong Zhang /* scale the diagonal part */ 1803a30f8f8cSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1804a30f8f8cSSatish Balay 18055e90f9d9SHong Zhang /* right diagonalscale the off-diagonal part */ 1806ca9f406cSSatish Balay ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 18070298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1808a30f8f8cSSatish Balay PetscFunctionReturn(0); 1809a30f8f8cSSatish Balay } 1810a30f8f8cSSatish Balay 1811dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1812a30f8f8cSSatish Balay { 1813f3566a2aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1814dfbe8321SBarry Smith PetscErrorCode ierr; 1815a30f8f8cSSatish Balay 1816a30f8f8cSSatish Balay PetscFunctionBegin; 1817a30f8f8cSSatish Balay ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1818a30f8f8cSSatish Balay PetscFunctionReturn(0); 1819a30f8f8cSSatish Balay } 1820a30f8f8cSSatish Balay 18216849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1822a30f8f8cSSatish Balay 1823ace3abfcSBarry Smith PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1824a30f8f8cSSatish Balay { 1825a30f8f8cSSatish Balay Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1826a30f8f8cSSatish Balay Mat a,b,c,d; 1827ace3abfcSBarry Smith PetscBool flg; 1828dfbe8321SBarry Smith PetscErrorCode ierr; 1829a30f8f8cSSatish Balay 1830a30f8f8cSSatish Balay PetscFunctionBegin; 1831a30f8f8cSSatish Balay a = matA->A; b = matA->B; 1832a30f8f8cSSatish Balay c = matB->A; d = matB->B; 1833a30f8f8cSSatish Balay 1834a30f8f8cSSatish Balay ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1835abc0a331SBarry Smith if (flg) { 1836a30f8f8cSSatish Balay ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1837a30f8f8cSSatish Balay } 1838b2566f29SBarry Smith ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1839a30f8f8cSSatish Balay PetscFunctionReturn(0); 1840a30f8f8cSSatish Balay } 1841a30f8f8cSSatish Balay 18423c896bc6SHong Zhang PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 18433c896bc6SHong Zhang { 18443c896bc6SHong Zhang PetscErrorCode ierr; 18454c7a3774SStefano Zampini PetscBool isbaij; 18463c896bc6SHong Zhang 18473c896bc6SHong Zhang PetscFunctionBegin; 18484c7a3774SStefano Zampini ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 18494c7a3774SStefano Zampini if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 18503c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 18513c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1852d0d4cfc2SHong Zhang ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 18533c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1854d0d4cfc2SHong Zhang ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 18553c896bc6SHong Zhang } else { 18564c7a3774SStefano Zampini Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 18574c7a3774SStefano Zampini Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 18584c7a3774SStefano Zampini 18593c896bc6SHong Zhang ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 18603c896bc6SHong Zhang ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 18613c896bc6SHong Zhang } 1862cdc753b6SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 18633c896bc6SHong Zhang PetscFunctionReturn(0); 18643c896bc6SHong Zhang } 18653c896bc6SHong Zhang 18664994cf47SJed Brown PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1867273d9f13SBarry Smith { 1868dfbe8321SBarry Smith PetscErrorCode ierr; 1869273d9f13SBarry Smith 1870273d9f13SBarry Smith PetscFunctionBegin; 1871535b19f3SBarry Smith ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1872273d9f13SBarry Smith PetscFunctionReturn(0); 1873273d9f13SBarry Smith } 1874a5e6ed63SBarry Smith 18754fe895cdSHong Zhang PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 18764fe895cdSHong Zhang { 18774fe895cdSHong Zhang PetscErrorCode ierr; 18784fe895cdSHong Zhang Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 18794fe895cdSHong Zhang PetscBLASInt bnz,one=1; 18804fe895cdSHong Zhang Mat_SeqSBAIJ *xa,*ya; 18814fe895cdSHong Zhang Mat_SeqBAIJ *xb,*yb; 18824fe895cdSHong Zhang 18834fe895cdSHong Zhang PetscFunctionBegin; 18844fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 18854fe895cdSHong Zhang PetscScalar alpha = a; 18864fe895cdSHong Zhang xa = (Mat_SeqSBAIJ*)xx->A->data; 18874fe895cdSHong Zhang ya = (Mat_SeqSBAIJ*)yy->A->data; 1888c5df96a5SBarry Smith ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 18898b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 18904fe895cdSHong Zhang xb = (Mat_SeqBAIJ*)xx->B->data; 18914fe895cdSHong Zhang yb = (Mat_SeqBAIJ*)yy->B->data; 1892c5df96a5SBarry Smith ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 18938b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1894a3fa217bSJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1895ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1896ab784542SHong Zhang ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1897ab784542SHong Zhang ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1898ab784542SHong Zhang ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 18994fe895cdSHong Zhang } else { 19004de5dceeSHong Zhang Mat B; 19014de5dceeSHong Zhang PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 19024de5dceeSHong Zhang if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1903d0d4cfc2SHong Zhang ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 19044de5dceeSHong Zhang ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 19054de5dceeSHong Zhang ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 19064de5dceeSHong Zhang ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 19074de5dceeSHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 19084de5dceeSHong Zhang ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 19094de5dceeSHong Zhang ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 19104de5dceeSHong Zhang ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 19114de5dceeSHong Zhang ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 19124de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 19134de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 19144de5dceeSHong Zhang ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 19154de5dceeSHong Zhang ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 191628be2f97SBarry Smith ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 19174de5dceeSHong Zhang ierr = PetscFree(nnz_d);CHKERRQ(ierr); 19184de5dceeSHong Zhang ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1919d0d4cfc2SHong Zhang ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 19204de5dceeSHong Zhang ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 19214fe895cdSHong Zhang } 19224fe895cdSHong Zhang PetscFunctionReturn(0); 19234fe895cdSHong Zhang } 19244fe895cdSHong Zhang 19257dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1926a5e6ed63SBarry Smith { 19276849ba73SBarry Smith PetscErrorCode ierr; 19281302d50aSBarry Smith PetscInt i; 1929afebec48SHong Zhang PetscBool flg; 1930a5e6ed63SBarry Smith 19316849ba73SBarry Smith PetscFunctionBegin; 19327dae84e0SHong Zhang ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1933a5e6ed63SBarry Smith for (i=0; i<n; i++) { 1934a5e6ed63SBarry Smith ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1935afebec48SHong Zhang if (!flg) { 1936b2fa50c1SHong Zhang ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1937a5e6ed63SBarry Smith } 19384dcd73b1SHong Zhang } 1939a5e6ed63SBarry Smith PetscFunctionReturn(0); 1940a5e6ed63SBarry Smith } 1941a5e6ed63SBarry Smith 19427d68702bSBarry Smith PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 19437d68702bSBarry Smith { 19447d68702bSBarry Smith PetscErrorCode ierr; 19457d68702bSBarry Smith Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 19466f33a894SBarry Smith Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 19477d68702bSBarry Smith 19487d68702bSBarry Smith PetscFunctionBegin; 19496f33a894SBarry Smith if (!Y->preallocated) { 19507d68702bSBarry Smith ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 19516f33a894SBarry Smith } else if (!aij->nz) { 1952b83222d8SBarry Smith PetscInt nonew = aij->nonew; 19536f33a894SBarry Smith ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1954b83222d8SBarry Smith aij->nonew = nonew; 19557d68702bSBarry Smith } 19567d68702bSBarry Smith ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 19577d68702bSBarry Smith PetscFunctionReturn(0); 19587d68702bSBarry Smith } 19597d68702bSBarry Smith 19603b49f96aSBarry Smith PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 19613b49f96aSBarry Smith { 19623b49f96aSBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 19633b49f96aSBarry Smith PetscErrorCode ierr; 19643b49f96aSBarry Smith 19653b49f96aSBarry Smith PetscFunctionBegin; 19663b49f96aSBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 19673b49f96aSBarry Smith ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 19683b49f96aSBarry Smith if (d) { 19693b49f96aSBarry Smith PetscInt rstart; 19703b49f96aSBarry Smith ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 19713b49f96aSBarry Smith *d += rstart/A->rmap->bs; 19723b49f96aSBarry Smith 19733b49f96aSBarry Smith } 19743b49f96aSBarry Smith PetscFunctionReturn(0); 19753b49f96aSBarry Smith } 19763b49f96aSBarry Smith 1977a5b7ff6bSBarry Smith PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1978a5b7ff6bSBarry Smith { 1979a5b7ff6bSBarry Smith PetscFunctionBegin; 1980a5b7ff6bSBarry Smith *a = ((Mat_MPISBAIJ*)A->data)->A; 1981a5b7ff6bSBarry Smith PetscFunctionReturn(0); 1982a5b7ff6bSBarry Smith } 19833b49f96aSBarry Smith 1984a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/ 19853964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1986a30f8f8cSSatish Balay MatGetRow_MPISBAIJ, 1987a30f8f8cSSatish Balay MatRestoreRow_MPISBAIJ, 1988a9d4b620SHong Zhang MatMult_MPISBAIJ, 198997304618SKris Buschelman /* 4*/ MatMultAdd_MPISBAIJ, 1990431c96f7SBarry Smith MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1991431c96f7SBarry Smith MatMultAdd_MPISBAIJ, 1992a30f8f8cSSatish Balay 0, 1993a30f8f8cSSatish Balay 0, 1994a30f8f8cSSatish Balay 0, 199597304618SKris Buschelman /* 10*/ 0, 1996a30f8f8cSSatish Balay 0, 1997a30f8f8cSSatish Balay 0, 199841f059aeSBarry Smith MatSOR_MPISBAIJ, 1999a30f8f8cSSatish Balay MatTranspose_MPISBAIJ, 200097304618SKris Buschelman /* 15*/ MatGetInfo_MPISBAIJ, 2001a30f8f8cSSatish Balay MatEqual_MPISBAIJ, 2002a30f8f8cSSatish Balay MatGetDiagonal_MPISBAIJ, 2003a30f8f8cSSatish Balay MatDiagonalScale_MPISBAIJ, 2004a30f8f8cSSatish Balay MatNorm_MPISBAIJ, 200597304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPISBAIJ, 2006a30f8f8cSSatish Balay MatAssemblyEnd_MPISBAIJ, 2007a30f8f8cSSatish Balay MatSetOption_MPISBAIJ, 2008a30f8f8cSSatish Balay MatZeroEntries_MPISBAIJ, 2009d519adbfSMatthew Knepley /* 24*/ 0, 2010a30f8f8cSSatish Balay 0, 2011a30f8f8cSSatish Balay 0, 2012a30f8f8cSSatish Balay 0, 2013a30f8f8cSSatish Balay 0, 20144994cf47SJed Brown /* 29*/ MatSetUp_MPISBAIJ, 2015b5df2d14SHong Zhang 0, 2016a30f8f8cSSatish Balay 0, 2017a5b7ff6bSBarry Smith MatGetDiagonalBlock_MPISBAIJ, 2018a30f8f8cSSatish Balay 0, 2019d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPISBAIJ, 2020a30f8f8cSSatish Balay 0, 2021a30f8f8cSSatish Balay 0, 2022a30f8f8cSSatish Balay 0, 2023a30f8f8cSSatish Balay 0, 2024d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPISBAIJ, 20257dae84e0SHong Zhang MatCreateSubMatrices_MPISBAIJ, 2026d94109b8SHong Zhang MatIncreaseOverlap_MPISBAIJ, 2027a30f8f8cSSatish Balay MatGetValues_MPISBAIJ, 20283c896bc6SHong Zhang MatCopy_MPISBAIJ, 2029d519adbfSMatthew Knepley /* 44*/ 0, 2030a30f8f8cSSatish Balay MatScale_MPISBAIJ, 20317d68702bSBarry Smith MatShift_MPISBAIJ, 2032a30f8f8cSSatish Balay 0, 2033a30f8f8cSSatish Balay 0, 2034f73d5cc4SBarry Smith /* 49*/ 0, 2035a30f8f8cSSatish Balay 0, 2036a30f8f8cSSatish Balay 0, 2037a30f8f8cSSatish Balay 0, 2038a30f8f8cSSatish Balay 0, 2039d519adbfSMatthew Knepley /* 54*/ 0, 2040a30f8f8cSSatish Balay 0, 2041a30f8f8cSSatish Balay MatSetUnfactored_MPISBAIJ, 2042a30f8f8cSSatish Balay 0, 2043a30f8f8cSSatish Balay MatSetValuesBlocked_MPISBAIJ, 20447dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPISBAIJ, 2045a30f8f8cSSatish Balay 0, 2046a30f8f8cSSatish Balay 0, 2047357abbc8SBarry Smith 0, 204824d5174aSHong Zhang 0, 2049d519adbfSMatthew Knepley /* 64*/ 0, 205024d5174aSHong Zhang 0, 205124d5174aSHong Zhang 0, 205224d5174aSHong Zhang 0, 205324d5174aSHong Zhang 0, 2054d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 205524d5174aSHong Zhang 0, 205697304618SKris Buschelman 0, 205797304618SKris Buschelman 0, 205897304618SKris Buschelman 0, 2059d519adbfSMatthew Knepley /* 74*/ 0, 206097304618SKris Buschelman 0, 206197304618SKris Buschelman 0, 206297304618SKris Buschelman 0, 206397304618SKris Buschelman 0, 2064d519adbfSMatthew Knepley /* 79*/ 0, 206597304618SKris Buschelman 0, 206697304618SKris Buschelman 0, 206797304618SKris Buschelman 0, 20685bba2384SShri Abhyankar MatLoad_MPISBAIJ, 2069d519adbfSMatthew Knepley /* 84*/ 0, 2070865e5f61SKris Buschelman 0, 2071865e5f61SKris Buschelman 0, 2072865e5f61SKris Buschelman 0, 2073865e5f61SKris Buschelman 0, 2074d519adbfSMatthew Knepley /* 89*/ 0, 2075865e5f61SKris Buschelman 0, 2076865e5f61SKris Buschelman 0, 2077865e5f61SKris Buschelman 0, 2078865e5f61SKris Buschelman 0, 2079d519adbfSMatthew Knepley /* 94*/ 0, 2080865e5f61SKris Buschelman 0, 2081865e5f61SKris Buschelman 0, 208299cafbc1SBarry Smith 0, 208399cafbc1SBarry Smith 0, 2084d519adbfSMatthew Knepley /* 99*/ 0, 208599cafbc1SBarry Smith 0, 208699cafbc1SBarry Smith 0, 208799cafbc1SBarry Smith 0, 208899cafbc1SBarry Smith 0, 2089d519adbfSMatthew Knepley /*104*/ 0, 209099cafbc1SBarry Smith MatRealPart_MPISBAIJ, 2091d0d4cfc2SHong Zhang MatImaginaryPart_MPISBAIJ, 2092d0d4cfc2SHong Zhang MatGetRowUpperTriangular_MPISBAIJ, 209395936485SShri Abhyankar MatRestoreRowUpperTriangular_MPISBAIJ, 209495936485SShri Abhyankar /*109*/ 0, 209595936485SShri Abhyankar 0, 209695936485SShri Abhyankar 0, 209795936485SShri Abhyankar 0, 20983b49f96aSBarry Smith MatMissingDiagonal_MPISBAIJ, 209995936485SShri Abhyankar /*114*/ 0, 210095936485SShri Abhyankar 0, 210195936485SShri Abhyankar 0, 210295936485SShri Abhyankar 0, 210395936485SShri Abhyankar 0, 210495936485SShri Abhyankar /*119*/ 0, 210595936485SShri Abhyankar 0, 210695936485SShri Abhyankar 0, 21073964eb88SJed Brown 0, 21083964eb88SJed Brown 0, 21093964eb88SJed Brown /*124*/ 0, 21103964eb88SJed Brown 0, 21113964eb88SJed Brown 0, 21123964eb88SJed Brown 0, 21133964eb88SJed Brown 0, 21143964eb88SJed Brown /*129*/ 0, 21153964eb88SJed Brown 0, 21163964eb88SJed Brown 0, 21173964eb88SJed Brown 0, 21183964eb88SJed Brown 0, 21193964eb88SJed Brown /*134*/ 0, 21203964eb88SJed Brown 0, 21213964eb88SJed Brown 0, 21223964eb88SJed Brown 0, 21233964eb88SJed Brown 0, 212446533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 2125f9426fe0SMark Adams 0, 212659f5e6ceSHong Zhang 0, 212759f5e6ceSHong Zhang 0, 212859f5e6ceSHong Zhang 0, 212959f5e6ceSHong Zhang /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 213099cafbc1SBarry Smith }; 2131a30f8f8cSSatish Balay 2132b2573a8aSBarry Smith PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2133a23d5eceSKris Buschelman { 2134a23d5eceSKris Buschelman Mat_MPISBAIJ *b; 2135dfbe8321SBarry Smith PetscErrorCode ierr; 2136535b19f3SBarry Smith PetscInt i,mbs,Mbs; 21375d2a9ed1SStefano Zampini PetscMPIInt size; 2138a23d5eceSKris Buschelman 2139a23d5eceSKris Buschelman PetscFunctionBegin; 214033d57670SJed Brown ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 214126283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 214226283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2143e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2144899cda47SBarry Smith 2145a23d5eceSKris Buschelman b = (Mat_MPISBAIJ*)B->data; 2146d0f46423SBarry Smith mbs = B->rmap->n/bs; 2147d0f46423SBarry Smith Mbs = B->rmap->N/bs; 2148c2fc9fa9SBarry Smith if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 2149a23d5eceSKris Buschelman 2150d0f46423SBarry Smith B->rmap->bs = bs; 2151a23d5eceSKris Buschelman b->bs2 = bs*bs; 2152a23d5eceSKris Buschelman b->mbs = mbs; 2153a23d5eceSKris Buschelman b->Mbs = Mbs; 2154de64b629SHong Zhang b->nbs = B->cmap->n/bs; 2155de64b629SHong Zhang b->Nbs = B->cmap->N/bs; 2156a23d5eceSKris Buschelman 2157a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2158d0f46423SBarry Smith b->rangebs[i] = B->rmap->range[i]/bs; 2159a23d5eceSKris Buschelman } 2160d0f46423SBarry Smith b->rstartbs = B->rmap->rstart/bs; 2161d0f46423SBarry Smith b->rendbs = B->rmap->rend/bs; 2162a23d5eceSKris Buschelman 2163d0f46423SBarry Smith b->cstartbs = B->cmap->rstart/bs; 2164d0f46423SBarry Smith b->cendbs = B->cmap->rend/bs; 2165a23d5eceSKris Buschelman 2166cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE) 2167cb7b82ddSBarry Smith ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2168cb7b82ddSBarry Smith #else 2169cb7b82ddSBarry Smith ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2170cb7b82ddSBarry Smith #endif 2171cb7b82ddSBarry Smith ierr = PetscFree(b->garray);CHKERRQ(ierr); 2172cb7b82ddSBarry Smith ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2173cb7b82ddSBarry Smith ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2174cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2175cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2176cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2177cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2178cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2179cb7b82ddSBarry Smith ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2180cb7b82ddSBarry Smith 2181cb7b82ddSBarry Smith /* Because the B will have been resized we simply destroy it and create a new one each time */ 21825d2a9ed1SStefano Zampini ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2183cb7b82ddSBarry Smith ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2184cb7b82ddSBarry Smith ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 21855d2a9ed1SStefano Zampini ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2186cb7b82ddSBarry Smith ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2187cb7b82ddSBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2188cb7b82ddSBarry Smith 2189526dfc15SBarry Smith if (!B->preallocated) { 2190f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2191d0f46423SBarry Smith ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 21929c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 21933bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2194ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2195526dfc15SBarry Smith } 2196a23d5eceSKris Buschelman 2197526dfc15SBarry Smith ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2198526dfc15SBarry Smith ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 219926fbe8dcSKarl Rupp 2200526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 2201cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 2202cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 2203a23d5eceSKris Buschelman PetscFunctionReturn(0); 2204a23d5eceSKris Buschelman } 2205a23d5eceSKris Buschelman 2206dfb205c3SBarry Smith PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2207dfb205c3SBarry Smith { 2208dfb205c3SBarry Smith PetscInt m,rstart,cstart,cend; 2209dfb205c3SBarry Smith PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2210dfb205c3SBarry Smith const PetscInt *JJ =0; 2211dfb205c3SBarry Smith PetscScalar *values=0; 2212dfb205c3SBarry Smith PetscErrorCode ierr; 2213dfb205c3SBarry Smith 2214dfb205c3SBarry Smith PetscFunctionBegin; 2215ce94432eSBarry Smith if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2216dfb205c3SBarry Smith ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2217dfb205c3SBarry Smith ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2218dfb205c3SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2219dfb205c3SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2220e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2221dfb205c3SBarry Smith m = B->rmap->n/bs; 2222dfb205c3SBarry Smith rstart = B->rmap->rstart/bs; 2223dfb205c3SBarry Smith cstart = B->cmap->rstart/bs; 2224dfb205c3SBarry Smith cend = B->cmap->rend/bs; 2225dfb205c3SBarry Smith 2226dfb205c3SBarry Smith if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2227dcca6d9dSJed Brown ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2228dfb205c3SBarry Smith for (i=0; i<m; i++) { 2229dfb205c3SBarry Smith nz = ii[i+1] - ii[i]; 2230dfb205c3SBarry Smith if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2231dfb205c3SBarry Smith nz_max = PetscMax(nz_max,nz); 2232dfb205c3SBarry Smith JJ = jj + ii[i]; 2233dfb205c3SBarry Smith for (j=0; j<nz; j++) { 2234dfb205c3SBarry Smith if (*JJ >= cstart) break; 2235dfb205c3SBarry Smith JJ++; 2236dfb205c3SBarry Smith } 2237dfb205c3SBarry Smith d = 0; 2238dfb205c3SBarry Smith for (; j<nz; j++) { 2239dfb205c3SBarry Smith if (*JJ++ >= cend) break; 2240dfb205c3SBarry Smith d++; 2241dfb205c3SBarry Smith } 2242dfb205c3SBarry Smith d_nnz[i] = d; 2243dfb205c3SBarry Smith o_nnz[i] = nz - d; 2244dfb205c3SBarry Smith } 2245dfb205c3SBarry Smith ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2246dfb205c3SBarry Smith ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2247dfb205c3SBarry Smith 2248dfb205c3SBarry Smith values = (PetscScalar*)V; 2249dfb205c3SBarry Smith if (!values) { 2250785e854fSJed Brown ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2251dfb205c3SBarry Smith ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2252dfb205c3SBarry Smith } 2253dfb205c3SBarry Smith for (i=0; i<m; i++) { 2254dfb205c3SBarry Smith PetscInt row = i + rstart; 2255dfb205c3SBarry Smith PetscInt ncols = ii[i+1] - ii[i]; 2256dfb205c3SBarry Smith const PetscInt *icols = jj + ii[i]; 2257dfb205c3SBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2258dfb205c3SBarry Smith ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2259dfb205c3SBarry Smith } 2260dfb205c3SBarry Smith 2261dfb205c3SBarry Smith if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2262dfb205c3SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2263dfb205c3SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 22647827cd58SJed Brown ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2265dfb205c3SBarry Smith PetscFunctionReturn(0); 2266dfb205c3SBarry Smith } 2267dfb205c3SBarry Smith 22680bad9183SKris Buschelman /*MC 2269fafad747SKris Buschelman MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2270828413b8SBarry Smith based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2271828413b8SBarry Smith the matrix is stored. 2272828413b8SBarry Smith 2273828413b8SBarry Smith For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2274828413b8SBarry Smith can call MatSetOption(Mat, MAT_HERMITIAN); 22750bad9183SKris Buschelman 22760bad9183SKris Buschelman Options Database Keys: 22770bad9183SKris Buschelman . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 22780bad9183SKris Buschelman 22790bad9183SKris Buschelman Level: beginner 22800bad9183SKris Buschelman 22810bad9183SKris Buschelman .seealso: MatCreateMPISBAIJ 22820bad9183SKris Buschelman M*/ 22830bad9183SKris Buschelman 22848cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2285b5df2d14SHong Zhang { 2286b5df2d14SHong Zhang Mat_MPISBAIJ *b; 2287dfbe8321SBarry Smith PetscErrorCode ierr; 228894ae4db5SBarry Smith PetscBool flg = PETSC_FALSE; 2289b5df2d14SHong Zhang 2290b5df2d14SHong Zhang PetscFunctionBegin; 2291b00a9115SJed Brown ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2292b0a32e0cSBarry Smith B->data = (void*)b; 2293b5df2d14SHong Zhang ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2294b5df2d14SHong Zhang 2295b5df2d14SHong Zhang B->ops->destroy = MatDestroy_MPISBAIJ; 2296b5df2d14SHong Zhang B->ops->view = MatView_MPISBAIJ; 2297b5df2d14SHong Zhang B->assembled = PETSC_FALSE; 2298b5df2d14SHong Zhang B->insertmode = NOT_SET_VALUES; 229926fbe8dcSKarl Rupp 2300ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2301ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2302b5df2d14SHong Zhang 2303b5df2d14SHong Zhang /* build local table of row and column ownerships */ 2304854ce69bSBarry Smith ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2305b5df2d14SHong Zhang 2306b5df2d14SHong Zhang /* build cache for off array entries formed */ 2307ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 230826fbe8dcSKarl Rupp 2309b5df2d14SHong Zhang b->donotstash = PETSC_FALSE; 23100298fd71SBarry Smith b->colmap = NULL; 23110298fd71SBarry Smith b->garray = NULL; 2312b5df2d14SHong Zhang b->roworiented = PETSC_TRUE; 2313b5df2d14SHong Zhang 2314b5df2d14SHong Zhang /* stuff used in block assembly */ 2315b5df2d14SHong Zhang b->barray = 0; 2316b5df2d14SHong Zhang 2317b5df2d14SHong Zhang /* stuff used for matrix vector multiply */ 2318b5df2d14SHong Zhang b->lvec = 0; 2319b5df2d14SHong Zhang b->Mvctx = 0; 232040781036SHong Zhang b->slvec0 = 0; 232140781036SHong Zhang b->slvec0b = 0; 232240781036SHong Zhang b->slvec1 = 0; 232340781036SHong Zhang b->slvec1a = 0; 232440781036SHong Zhang b->slvec1b = 0; 232540781036SHong Zhang b->sMvctx = 0; 2326b5df2d14SHong Zhang 2327b5df2d14SHong Zhang /* stuff for MatGetRow() */ 2328b5df2d14SHong Zhang b->rowindices = 0; 2329b5df2d14SHong Zhang b->rowvalues = 0; 2330b5df2d14SHong Zhang b->getrowactive = PETSC_FALSE; 2331b5df2d14SHong Zhang 2332b5df2d14SHong Zhang /* hash table stuff */ 2333b5df2d14SHong Zhang b->ht = 0; 2334b5df2d14SHong Zhang b->hd = 0; 2335b5df2d14SHong Zhang b->ht_size = 0; 2336b5df2d14SHong Zhang b->ht_flag = PETSC_FALSE; 2337b5df2d14SHong Zhang b->ht_fact = 0; 2338b5df2d14SHong Zhang b->ht_total_ct = 0; 2339b5df2d14SHong Zhang b->ht_insert_ct = 0; 2340b5df2d14SHong Zhang 23417dae84e0SHong Zhang /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 23427a868f3eSHong Zhang b->ijonly = PETSC_FALSE; 23437a868f3eSHong Zhang 234459ffdab8SBarry Smith b->in_loc = 0; 234559ffdab8SBarry Smith b->v_loc = 0; 234659ffdab8SBarry Smith b->n_loc = 0; 234794ae4db5SBarry Smith 2348bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2349bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2350bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2351bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 23526214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 23536214f412SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 23546214f412SHong Zhang #endif 2355b147fbf3SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr); 2356b147fbf3SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr); 2357aa5a9175SDahai Guo 235823ce1328SBarry Smith B->symmetric = PETSC_TRUE; 235923ce1328SBarry Smith B->structurally_symmetric = PETSC_TRUE; 236023ce1328SBarry Smith B->symmetric_set = PETSC_TRUE; 236123ce1328SBarry Smith B->structurally_symmetric_set = PETSC_TRUE; 23629899f194SHong Zhang B->symmetric_eternal = PETSC_TRUE; 236326fbe8dcSKarl Rupp 236413647f61SHong Zhang B->hermitian = PETSC_FALSE; 236513647f61SHong Zhang B->hermitian_set = PETSC_FALSE; 236613647f61SHong Zhang 236717667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 236894ae4db5SBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 236994ae4db5SBarry Smith ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 237094ae4db5SBarry Smith if (flg) { 237194ae4db5SBarry Smith PetscReal fact = 1.39; 237294ae4db5SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 237394ae4db5SBarry Smith ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 237494ae4db5SBarry Smith if (fact <= 1.0) fact = 1.39; 237594ae4db5SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 237694ae4db5SBarry Smith ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 237794ae4db5SBarry Smith } 237894ae4db5SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 2379b5df2d14SHong Zhang PetscFunctionReturn(0); 2380b5df2d14SHong Zhang } 2381b5df2d14SHong Zhang 2382209238afSKris Buschelman /*MC 2383002d173eSKris Buschelman MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2384209238afSKris Buschelman 2385209238afSKris Buschelman This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2386209238afSKris Buschelman and MATMPISBAIJ otherwise. 2387209238afSKris Buschelman 2388209238afSKris Buschelman Options Database Keys: 2389209238afSKris Buschelman . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2390209238afSKris Buschelman 2391209238afSKris Buschelman Level: beginner 2392209238afSKris Buschelman 2393209238afSKris Buschelman .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 2394209238afSKris Buschelman M*/ 2395209238afSKris Buschelman 2396b5df2d14SHong Zhang /*@C 2397b5df2d14SHong Zhang MatMPISBAIJSetPreallocation - For good matrix assembly performance 2398b5df2d14SHong Zhang the user should preallocate the matrix storage by setting the parameters 2399b5df2d14SHong Zhang d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2400b5df2d14SHong Zhang performance can be increased by more than a factor of 50. 2401b5df2d14SHong Zhang 2402b5df2d14SHong Zhang Collective on Mat 2403b5df2d14SHong Zhang 2404b5df2d14SHong Zhang Input Parameters: 24051c4f3114SJed Brown + B - the matrix 2406bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2407bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2408b5df2d14SHong Zhang . d_nz - number of block nonzeros per block row in diagonal portion of local 2409b5df2d14SHong Zhang submatrix (same for all local rows) 2410b5df2d14SHong Zhang . d_nnz - array containing the number of block nonzeros in the various block rows 24116d10fdaeSSatish Balay in the upper triangular and diagonal part of the in diagonal portion of the local 24120298fd71SBarry Smith (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 241395742e49SBarry Smith for the diagonal entry and set a value even if it is zero. 2414b5df2d14SHong Zhang . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2415b5df2d14SHong Zhang submatrix (same for all local rows). 2416b5df2d14SHong Zhang - o_nnz - array containing the number of nonzeros in the various block rows of the 2417c2fc9fa9SBarry Smith off-diagonal portion of the local submatrix that is right of the diagonal 24180298fd71SBarry Smith (possibly different for each block row) or NULL. 2419b5df2d14SHong Zhang 2420b5df2d14SHong Zhang 2421b5df2d14SHong Zhang Options Database Keys: 2422*a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2423b5df2d14SHong Zhang block calculations (much slower) 2424*a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 2425b5df2d14SHong Zhang 2426b5df2d14SHong Zhang Notes: 2427b5df2d14SHong Zhang 2428b5df2d14SHong Zhang If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2429b5df2d14SHong Zhang than it must be used on all processors that share the object for that argument. 2430b5df2d14SHong Zhang 243149a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 243249a6f317SBarry Smith 2433b5df2d14SHong Zhang Storage Information: 2434b5df2d14SHong Zhang For a square global matrix we define each processor's diagonal portion 2435b5df2d14SHong Zhang to be its local rows and the corresponding columns (a square submatrix); 2436b5df2d14SHong Zhang each processor's off-diagonal portion encompasses the remainder of the 2437b5df2d14SHong Zhang local matrix (a rectangular submatrix). 2438b5df2d14SHong Zhang 2439b5df2d14SHong Zhang The user can specify preallocated storage for the diagonal part of 2440b5df2d14SHong Zhang the local submatrix with either d_nz or d_nnz (not both). Set 24410298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2442b5df2d14SHong Zhang memory allocation. Likewise, specify preallocated storage for the 2443b5df2d14SHong Zhang off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2444b5df2d14SHong Zhang 2445aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 2446aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2447aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 2448aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 2449aa95bbe8SBarry Smith 2450b5df2d14SHong Zhang Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2451b5df2d14SHong Zhang the figure below we depict these three local rows and all columns (0-11). 2452b5df2d14SHong Zhang 2453b5df2d14SHong Zhang .vb 2454b5df2d14SHong Zhang 0 1 2 3 4 5 6 7 8 9 10 11 2455a4b1a0f6SJed Brown -------------------------- 2456c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2457c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2458c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2459a4b1a0f6SJed Brown -------------------------- 2460b5df2d14SHong Zhang .ve 2461b5df2d14SHong Zhang 2462b5df2d14SHong Zhang Thus, any entries in the d locations are stored in the d (diagonal) 2463b5df2d14SHong Zhang submatrix, and any entries in the o locations are stored in the 24646d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 24656d10fdaeSSatish Balay MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2466b5df2d14SHong Zhang 24676d10fdaeSSatish Balay Now d_nz should indicate the number of block nonzeros per row in the upper triangular 24686d10fdaeSSatish Balay plus the diagonal part of the d matrix, 2469c2fc9fa9SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix 2470c2fc9fa9SBarry Smith 2471b5df2d14SHong Zhang In general, for PDE problems in which most nonzeros are near the diagonal, 2472b5df2d14SHong Zhang one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2473b5df2d14SHong Zhang or you will get TERRIBLE performance; see the users' manual chapter on 2474b5df2d14SHong Zhang matrices. 2475b5df2d14SHong Zhang 2476b5df2d14SHong Zhang Level: intermediate 2477b5df2d14SHong Zhang 2478b5df2d14SHong Zhang .keywords: matrix, block, aij, compressed row, sparse, parallel 2479b5df2d14SHong Zhang 2480ab978733SBarry Smith .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2481b5df2d14SHong Zhang @*/ 24827087cfbeSBarry Smith PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2483b5df2d14SHong Zhang { 24844ac538c5SBarry Smith PetscErrorCode ierr; 2485b5df2d14SHong Zhang 2486b5df2d14SHong Zhang PetscFunctionBegin; 24876ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 24886ba663aaSJed Brown PetscValidType(B,1); 24896ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 24904ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2491b5df2d14SHong Zhang PetscFunctionReturn(0); 2492b5df2d14SHong Zhang } 2493b5df2d14SHong Zhang 2494a30f8f8cSSatish Balay /*@C 249569b1f4b7SBarry Smith MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2496a30f8f8cSSatish Balay (block compressed row). For good matrix assembly performance 2497a30f8f8cSSatish Balay the user should preallocate the matrix storage by setting the parameters 2498a30f8f8cSSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2499a30f8f8cSSatish Balay performance can be increased by more than a factor of 50. 2500a30f8f8cSSatish Balay 2501a30f8f8cSSatish Balay Collective on MPI_Comm 2502a30f8f8cSSatish Balay 2503a30f8f8cSSatish Balay Input Parameters: 2504a30f8f8cSSatish Balay + comm - MPI communicator 2505bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2506bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2507a30f8f8cSSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2508a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2509a30f8f8cSSatish Balay y vector for the matrix-vector product y = Ax. 2510a30f8f8cSSatish Balay . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2511a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2512a30f8f8cSSatish Balay x vector for the matrix-vector product y = Ax. 2513a30f8f8cSSatish Balay . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2514a30f8f8cSSatish Balay . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2515a30f8f8cSSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 2516a30f8f8cSSatish Balay submatrix (same for all local rows) 2517a30f8f8cSSatish Balay . d_nnz - array containing the number of block nonzeros in the various block rows 25186d10fdaeSSatish Balay in the upper triangular portion of the in diagonal portion of the local 25190298fd71SBarry Smith (possibly different for each block block row) or NULL. 252095742e49SBarry Smith If you plan to factor the matrix you must leave room for the diagonal entry and 252195742e49SBarry Smith set its value even if it is zero. 2522a30f8f8cSSatish Balay . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2523a30f8f8cSSatish Balay submatrix (same for all local rows). 2524a30f8f8cSSatish Balay - o_nnz - array containing the number of nonzeros in the various block rows of the 2525a30f8f8cSSatish Balay off-diagonal portion of the local submatrix (possibly different for 25260298fd71SBarry Smith each block row) or NULL. 2527a30f8f8cSSatish Balay 2528a30f8f8cSSatish Balay Output Parameter: 2529a30f8f8cSSatish Balay . A - the matrix 2530a30f8f8cSSatish Balay 2531a30f8f8cSSatish Balay Options Database Keys: 2532*a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2533a30f8f8cSSatish Balay block calculations (much slower) 2534a30f8f8cSSatish Balay . -mat_block_size - size of the blocks to use 2535*a2b725a8SWilliam Gropp - -mat_mpi - use the parallel matrix data structures even on one processor 2536a30f8f8cSSatish Balay (defaults to using SeqBAIJ format on one processor) 2537a30f8f8cSSatish Balay 2538175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2539f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 2540175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2541175b88e8SBarry Smith 2542a30f8f8cSSatish Balay Notes: 2543d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 25446d6d819aSHong Zhang This matrix type does not support complex Hermitian operation. 2545d1be2dadSMatthew Knepley 2546a30f8f8cSSatish Balay The user MUST specify either the local or global matrix dimensions 2547a30f8f8cSSatish Balay (possibly both). 2548a30f8f8cSSatish Balay 2549a30f8f8cSSatish Balay If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2550a30f8f8cSSatish Balay than it must be used on all processors that share the object for that argument. 2551a30f8f8cSSatish Balay 255249a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 255349a6f317SBarry Smith 2554a30f8f8cSSatish Balay Storage Information: 2555a30f8f8cSSatish Balay For a square global matrix we define each processor's diagonal portion 2556a30f8f8cSSatish Balay to be its local rows and the corresponding columns (a square submatrix); 2557a30f8f8cSSatish Balay each processor's off-diagonal portion encompasses the remainder of the 2558a30f8f8cSSatish Balay local matrix (a rectangular submatrix). 2559a30f8f8cSSatish Balay 2560a30f8f8cSSatish Balay The user can specify preallocated storage for the diagonal part of 2561a30f8f8cSSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 25620298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2563a30f8f8cSSatish Balay memory allocation. Likewise, specify preallocated storage for the 2564a30f8f8cSSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2565a30f8f8cSSatish Balay 2566a30f8f8cSSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2567a30f8f8cSSatish Balay the figure below we depict these three local rows and all columns (0-11). 2568a30f8f8cSSatish Balay 2569a30f8f8cSSatish Balay .vb 2570a30f8f8cSSatish Balay 0 1 2 3 4 5 6 7 8 9 10 11 2571a4b1a0f6SJed Brown -------------------------- 2572c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2573c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2574c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2575a4b1a0f6SJed Brown -------------------------- 2576a30f8f8cSSatish Balay .ve 2577a30f8f8cSSatish Balay 2578a30f8f8cSSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 2579a30f8f8cSSatish Balay submatrix, and any entries in the o locations are stored in the 25806d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 25816d10fdaeSSatish Balay MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2582a30f8f8cSSatish Balay 25836d10fdaeSSatish Balay Now d_nz should indicate the number of block nonzeros per row in the upper triangular 25846d10fdaeSSatish Balay plus the diagonal part of the d matrix, 2585a30f8f8cSSatish Balay and o_nz should indicate the number of block nonzeros per row in the o matrix. 2586a30f8f8cSSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 2587a30f8f8cSSatish Balay one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2588a30f8f8cSSatish Balay or you will get TERRIBLE performance; see the users' manual chapter on 2589a30f8f8cSSatish Balay matrices. 2590a30f8f8cSSatish Balay 2591a30f8f8cSSatish Balay Level: intermediate 2592a30f8f8cSSatish Balay 2593a30f8f8cSSatish Balay .keywords: matrix, block, aij, compressed row, sparse, parallel 2594a30f8f8cSSatish Balay 259569b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2596a30f8f8cSSatish Balay @*/ 2597a30f8f8cSSatish Balay 259869b1f4b7SBarry Smith PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2599a30f8f8cSSatish Balay { 26006849ba73SBarry Smith PetscErrorCode ierr; 26011302d50aSBarry Smith PetscMPIInt size; 2602a30f8f8cSSatish Balay 2603a30f8f8cSSatish Balay PetscFunctionBegin; 2604f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 2605f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2606273d9f13SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2607273d9f13SBarry Smith if (size > 1) { 2608b5df2d14SHong Zhang ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2609b5df2d14SHong Zhang ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2610273d9f13SBarry Smith } else { 2611273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2612273d9f13SBarry Smith ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2613273d9f13SBarry Smith } 2614a30f8f8cSSatish Balay PetscFunctionReturn(0); 2615a30f8f8cSSatish Balay } 2616a30f8f8cSSatish Balay 2617a30f8f8cSSatish Balay 26186849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2619a30f8f8cSSatish Balay { 2620a30f8f8cSSatish Balay Mat mat; 2621a30f8f8cSSatish Balay Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2622dfbe8321SBarry Smith PetscErrorCode ierr; 2623d0f46423SBarry Smith PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2624387bc808SHong Zhang PetscScalar *array; 2625a30f8f8cSSatish Balay 2626a30f8f8cSSatish Balay PetscFunctionBegin; 2627a30f8f8cSSatish Balay *newmat = 0; 262826fbe8dcSKarl Rupp 2629ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2630d0f46423SBarry Smith ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 26317adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 26321e1e43feSBarry Smith ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 26331e1e43feSBarry Smith ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2634e1b6402fSHong Zhang 2635d5f3da31SBarry Smith mat->factortype = matin->factortype; 2636273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 263782327fa8SHong Zhang mat->assembled = PETSC_TRUE; 26387fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 26397fff6886SHong Zhang 2640b5df2d14SHong Zhang a = (Mat_MPISBAIJ*)mat->data; 2641a30f8f8cSSatish Balay a->bs2 = oldmat->bs2; 2642a30f8f8cSSatish Balay a->mbs = oldmat->mbs; 2643a30f8f8cSSatish Balay a->nbs = oldmat->nbs; 2644a30f8f8cSSatish Balay a->Mbs = oldmat->Mbs; 2645a30f8f8cSSatish Balay a->Nbs = oldmat->Nbs; 2646a30f8f8cSSatish Balay 2647a30f8f8cSSatish Balay a->size = oldmat->size; 2648a30f8f8cSSatish Balay a->rank = oldmat->rank; 2649a30f8f8cSSatish Balay a->donotstash = oldmat->donotstash; 2650a30f8f8cSSatish Balay a->roworiented = oldmat->roworiented; 2651a30f8f8cSSatish Balay a->rowindices = 0; 2652a30f8f8cSSatish Balay a->rowvalues = 0; 2653a30f8f8cSSatish Balay a->getrowactive = PETSC_FALSE; 2654a30f8f8cSSatish Balay a->barray = 0; 2655899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 2656899cda47SBarry Smith a->rendbs = oldmat->rendbs; 2657899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 2658899cda47SBarry Smith a->cendbs = oldmat->cendbs; 2659a30f8f8cSSatish Balay 2660a30f8f8cSSatish Balay /* hash table stuff */ 2661a30f8f8cSSatish Balay a->ht = 0; 2662a30f8f8cSSatish Balay a->hd = 0; 2663a30f8f8cSSatish Balay a->ht_size = 0; 2664a30f8f8cSSatish Balay a->ht_flag = oldmat->ht_flag; 2665a30f8f8cSSatish Balay a->ht_fact = oldmat->ht_fact; 2666a30f8f8cSSatish Balay a->ht_total_ct = 0; 2667a30f8f8cSSatish Balay a->ht_insert_ct = 0; 2668a30f8f8cSSatish Balay 2669899cda47SBarry Smith ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2670a30f8f8cSSatish Balay if (oldmat->colmap) { 2671a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 2672a30f8f8cSSatish Balay ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2673a30f8f8cSSatish Balay #else 2674854ce69bSBarry Smith ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 26753bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 26761302d50aSBarry Smith ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2677a30f8f8cSSatish Balay #endif 2678a30f8f8cSSatish Balay } else a->colmap = 0; 2679387bc808SHong Zhang 2680a30f8f8cSSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2681785e854fSJed Brown ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 26823bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 26831302d50aSBarry Smith ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2684a30f8f8cSSatish Balay } else a->garray = 0; 2685a30f8f8cSSatish Balay 2686ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2687a30f8f8cSSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 26883bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2689a30f8f8cSSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 26903bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 269182327fa8SHong Zhang 269282327fa8SHong Zhang ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 26933bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 269482327fa8SHong Zhang ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 26953bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2696387bc808SHong Zhang 2697387bc808SHong Zhang ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 26981ebc52fbSHong Zhang ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2699778a2246SBarry Smith ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2700778a2246SBarry Smith ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 27011ebc52fbSHong Zhang ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 27021ebc52fbSHong Zhang ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2703778a2246SBarry Smith ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 27041ebc52fbSHong Zhang ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 27053bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 27063bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 27073bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 27083bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 27093bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2710387bc808SHong Zhang 2711387bc808SHong Zhang /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2712387bc808SHong Zhang ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2713387bc808SHong Zhang a->sMvctx = oldmat->sMvctx; 27143bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 271582327fa8SHong Zhang 2716a30f8f8cSSatish Balay ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 27173bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2718a30f8f8cSSatish Balay ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 27193bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2720140e18c1SBarry Smith ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2721a30f8f8cSSatish Balay *newmat = mat; 2722a30f8f8cSSatish Balay PetscFunctionReturn(0); 2723a30f8f8cSSatish Balay } 2724a30f8f8cSSatish Balay 2725112444f4SShri Abhyankar PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 272695936485SShri Abhyankar { 272795936485SShri Abhyankar PetscErrorCode ierr; 272895936485SShri Abhyankar PetscInt i,nz,j,rstart,rend; 272995936485SShri Abhyankar PetscScalar *vals,*buf; 2730ce94432eSBarry Smith MPI_Comm comm; 273195936485SShri Abhyankar MPI_Status status; 2732adcec1e5SJed Brown PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2733adcec1e5SJed Brown PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 273495936485SShri Abhyankar PetscInt *procsnz = 0,jj,*mycols,*ibuf; 27353059b6faSBarry Smith PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 273695936485SShri Abhyankar PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2737461878b2SBarry Smith PetscInt dcount,kmax,k,nzcount,tmp; 273895936485SShri Abhyankar int fd; 27397f489da9SVaclav Hapla PetscBool isbinary; 274095936485SShri Abhyankar 274195936485SShri Abhyankar PetscFunctionBegin; 27427f489da9SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 27437f489da9SVaclav Hapla if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 27447f489da9SVaclav Hapla 2745c98fd787SBarry Smith /* force binary viewer to load .info file if it has not yet done so */ 2746c98fd787SBarry Smith ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2747ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 27480298fd71SBarry Smith ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 27490298fd71SBarry Smith ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 275095936485SShri Abhyankar ierr = PetscOptionsEnd();CHKERRQ(ierr); 27513059b6faSBarry Smith if (bs < 0) bs = 1; 275295936485SShri Abhyankar 275395936485SShri Abhyankar ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 275495936485SShri Abhyankar ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 275595936485SShri Abhyankar ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 27565872f025SBarry Smith if (!rank) { 275795936485SShri Abhyankar ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 275895936485SShri Abhyankar if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 275902f61395SBarry Smith if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 276095936485SShri Abhyankar } 276195936485SShri Abhyankar 276295936485SShri Abhyankar ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 276326fbe8dcSKarl Rupp M = header[1]; 276426fbe8dcSKarl Rupp N = header[2]; 276595936485SShri Abhyankar 276695936485SShri Abhyankar /* If global sizes are set, check if they are consistent with that given in the file */ 2767461878b2SBarry Smith if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 2768461878b2SBarry Smith if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 276995936485SShri Abhyankar 277095936485SShri Abhyankar if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 277195936485SShri Abhyankar 277295936485SShri Abhyankar /* 277395936485SShri Abhyankar This code adds extra rows to make sure the number of rows is 277495936485SShri Abhyankar divisible by the blocksize 277595936485SShri Abhyankar */ 277695936485SShri Abhyankar Mbs = M/bs; 277795936485SShri Abhyankar extra_rows = bs - M + bs*(Mbs); 277895936485SShri Abhyankar if (extra_rows == bs) extra_rows = 0; 277995936485SShri Abhyankar else Mbs++; 278095936485SShri Abhyankar if (extra_rows &&!rank) { 278195936485SShri Abhyankar ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 278295936485SShri Abhyankar } 278395936485SShri Abhyankar 278495936485SShri Abhyankar /* determine ownership of all rows */ 278595936485SShri Abhyankar if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 278695936485SShri Abhyankar mbs = Mbs/size + ((Mbs % size) > rank); 278795936485SShri Abhyankar m = mbs*bs; 278895936485SShri Abhyankar } else { /* User Set */ 278995936485SShri Abhyankar m = newmat->rmap->n; 279095936485SShri Abhyankar mbs = m/bs; 279195936485SShri Abhyankar } 2792dcca6d9dSJed Brown ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 27934dc2109aSBarry Smith ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 279495936485SShri Abhyankar ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 279595936485SShri Abhyankar rowners[0] = 0; 279695936485SShri Abhyankar for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 279795936485SShri Abhyankar for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 279895936485SShri Abhyankar rstart = rowners[rank]; 279995936485SShri Abhyankar rend = rowners[rank+1]; 280095936485SShri Abhyankar 280195936485SShri Abhyankar /* distribute row lengths to all processors */ 2802785e854fSJed Brown ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 280395936485SShri Abhyankar if (!rank) { 2804854ce69bSBarry Smith ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 280595936485SShri Abhyankar ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 280695936485SShri Abhyankar for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2807785e854fSJed Brown ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 280895936485SShri Abhyankar for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 280995936485SShri Abhyankar ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 281095936485SShri Abhyankar ierr = PetscFree(sndcounts);CHKERRQ(ierr); 281195936485SShri Abhyankar } else { 281295936485SShri Abhyankar ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 281395936485SShri Abhyankar } 281495936485SShri Abhyankar 281595936485SShri Abhyankar if (!rank) { /* procs[0] */ 281695936485SShri Abhyankar /* calculate the number of nonzeros on each processor */ 2817785e854fSJed Brown ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 281895936485SShri Abhyankar ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 281995936485SShri Abhyankar for (i=0; i<size; i++) { 282095936485SShri Abhyankar for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 282195936485SShri Abhyankar procsnz[i] += rowlengths[j]; 282295936485SShri Abhyankar } 282395936485SShri Abhyankar } 282495936485SShri Abhyankar ierr = PetscFree(rowlengths);CHKERRQ(ierr); 282595936485SShri Abhyankar 282695936485SShri Abhyankar /* determine max buffer needed and allocate it */ 282795936485SShri Abhyankar maxnz = 0; 282895936485SShri Abhyankar for (i=0; i<size; i++) { 282995936485SShri Abhyankar maxnz = PetscMax(maxnz,procsnz[i]); 283095936485SShri Abhyankar } 2831785e854fSJed Brown ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 283295936485SShri Abhyankar 283395936485SShri Abhyankar /* read in my part of the matrix column indices */ 283495936485SShri Abhyankar nz = procsnz[0]; 2835785e854fSJed Brown ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 283695936485SShri Abhyankar mycols = ibuf; 283795936485SShri Abhyankar if (size == 1) nz -= extra_rows; 283895936485SShri Abhyankar ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 283926fbe8dcSKarl Rupp if (size == 1) { 284026fbe8dcSKarl Rupp for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 284126fbe8dcSKarl Rupp } 284295936485SShri Abhyankar 284395936485SShri Abhyankar /* read in every ones (except the last) and ship off */ 284495936485SShri Abhyankar for (i=1; i<size-1; i++) { 284595936485SShri Abhyankar nz = procsnz[i]; 284695936485SShri Abhyankar ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 284795936485SShri Abhyankar ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 284895936485SShri Abhyankar } 284995936485SShri Abhyankar /* read in the stuff for the last proc */ 285095936485SShri Abhyankar if (size != 1) { 285195936485SShri Abhyankar nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 285295936485SShri Abhyankar ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 285395936485SShri Abhyankar for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 285495936485SShri Abhyankar ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 285595936485SShri Abhyankar } 285695936485SShri Abhyankar ierr = PetscFree(cols);CHKERRQ(ierr); 285795936485SShri Abhyankar } else { /* procs[i], i>0 */ 285895936485SShri Abhyankar /* determine buffer space needed for message */ 285995936485SShri Abhyankar nz = 0; 286026fbe8dcSKarl Rupp for (i=0; i<m; i++) nz += locrowlens[i]; 2861785e854fSJed Brown ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 286295936485SShri Abhyankar mycols = ibuf; 286395936485SShri Abhyankar /* receive message of column indices*/ 286495936485SShri Abhyankar ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 286595936485SShri Abhyankar ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 286695936485SShri Abhyankar if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 286795936485SShri Abhyankar } 286895936485SShri Abhyankar 286995936485SShri Abhyankar /* loop over local rows, determining number of off diagonal entries */ 2870dcca6d9dSJed Brown ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2871dcca6d9dSJed Brown ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 287295936485SShri Abhyankar ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 287395936485SShri Abhyankar ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 287495936485SShri Abhyankar ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 287595936485SShri Abhyankar rowcount = 0; 287695936485SShri Abhyankar nzcount = 0; 287795936485SShri Abhyankar for (i=0; i<mbs; i++) { 287895936485SShri Abhyankar dcount = 0; 287995936485SShri Abhyankar odcount = 0; 288095936485SShri Abhyankar for (j=0; j<bs; j++) { 288195936485SShri Abhyankar kmax = locrowlens[rowcount]; 288295936485SShri Abhyankar for (k=0; k<kmax; k++) { 288395936485SShri Abhyankar tmp = mycols[nzcount++]/bs; /* block col. index */ 288495936485SShri Abhyankar if (!mask[tmp]) { 288595936485SShri Abhyankar mask[tmp] = 1; 288695936485SShri Abhyankar if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 288795936485SShri Abhyankar else masked1[dcount++] = tmp; /* entry in diag portion */ 288895936485SShri Abhyankar } 288995936485SShri Abhyankar } 289095936485SShri Abhyankar rowcount++; 289195936485SShri Abhyankar } 289295936485SShri Abhyankar 289395936485SShri Abhyankar dlens[i] = dcount; /* d_nzz[i] */ 289495936485SShri Abhyankar odlens[i] = odcount; /* o_nzz[i] */ 289595936485SShri Abhyankar 289695936485SShri Abhyankar /* zero out the mask elements we set */ 289795936485SShri Abhyankar for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 289895936485SShri Abhyankar for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 289995936485SShri Abhyankar } 290095936485SShri Abhyankar ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 290195936485SShri Abhyankar ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 29026816a49fSBarry Smith ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 290395936485SShri Abhyankar 290495936485SShri Abhyankar if (!rank) { 2905785e854fSJed Brown ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 290695936485SShri Abhyankar /* read in my part of the matrix numerical values */ 290795936485SShri Abhyankar nz = procsnz[0]; 290895936485SShri Abhyankar vals = buf; 290995936485SShri Abhyankar mycols = ibuf; 291095936485SShri Abhyankar if (size == 1) nz -= extra_rows; 291195936485SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 291226fbe8dcSKarl Rupp if (size == 1) { 291326fbe8dcSKarl Rupp for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 291426fbe8dcSKarl Rupp } 291595936485SShri Abhyankar 291695936485SShri Abhyankar /* insert into matrix */ 291795936485SShri Abhyankar jj = rstart*bs; 291895936485SShri Abhyankar for (i=0; i<m; i++) { 291995936485SShri Abhyankar ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 292095936485SShri Abhyankar mycols += locrowlens[i]; 292195936485SShri Abhyankar vals += locrowlens[i]; 292295936485SShri Abhyankar jj++; 292395936485SShri Abhyankar } 292495936485SShri Abhyankar 292595936485SShri Abhyankar /* read in other processors (except the last one) and ship out */ 292695936485SShri Abhyankar for (i=1; i<size-1; i++) { 292795936485SShri Abhyankar nz = procsnz[i]; 292895936485SShri Abhyankar vals = buf; 292995936485SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 293095936485SShri Abhyankar ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 293195936485SShri Abhyankar } 293295936485SShri Abhyankar /* the last proc */ 293395936485SShri Abhyankar if (size != 1) { 293495936485SShri Abhyankar nz = procsnz[i] - extra_rows; 293595936485SShri Abhyankar vals = buf; 293695936485SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 293795936485SShri Abhyankar for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 293895936485SShri Abhyankar ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 293995936485SShri Abhyankar } 294095936485SShri Abhyankar ierr = PetscFree(procsnz);CHKERRQ(ierr); 294195936485SShri Abhyankar 294295936485SShri Abhyankar } else { 294395936485SShri Abhyankar /* receive numeric values */ 2944785e854fSJed Brown ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 294595936485SShri Abhyankar 294695936485SShri Abhyankar /* receive message of values*/ 294795936485SShri Abhyankar vals = buf; 294895936485SShri Abhyankar mycols = ibuf; 294995936485SShri Abhyankar ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 295095936485SShri Abhyankar ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 295195936485SShri Abhyankar if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 295295936485SShri Abhyankar 295395936485SShri Abhyankar /* insert into matrix */ 295495936485SShri Abhyankar jj = rstart*bs; 295595936485SShri Abhyankar for (i=0; i<m; i++) { 295695936485SShri Abhyankar ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 295795936485SShri Abhyankar mycols += locrowlens[i]; 295895936485SShri Abhyankar vals += locrowlens[i]; 295995936485SShri Abhyankar jj++; 296095936485SShri Abhyankar } 296195936485SShri Abhyankar } 296295936485SShri Abhyankar 296395936485SShri Abhyankar ierr = PetscFree(locrowlens);CHKERRQ(ierr); 296495936485SShri Abhyankar ierr = PetscFree(buf);CHKERRQ(ierr); 296595936485SShri Abhyankar ierr = PetscFree(ibuf);CHKERRQ(ierr); 296695936485SShri Abhyankar ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 296795936485SShri Abhyankar ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 296895936485SShri Abhyankar ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 296995936485SShri Abhyankar ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 297095936485SShri Abhyankar ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 297195936485SShri Abhyankar PetscFunctionReturn(0); 297295936485SShri Abhyankar } 297395936485SShri Abhyankar 2974dcf5cc72SBarry Smith /*XXXXX@ 2975a30f8f8cSSatish Balay MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2976a30f8f8cSSatish Balay 2977a30f8f8cSSatish Balay Input Parameters: 2978a30f8f8cSSatish Balay . mat - the matrix 2979a30f8f8cSSatish Balay . fact - factor 2980a30f8f8cSSatish Balay 2981c5eb9154SBarry Smith Not Collective on Mat, each process can have a different hash factor 2982a30f8f8cSSatish Balay 2983a30f8f8cSSatish Balay Level: advanced 2984a30f8f8cSSatish Balay 2985a30f8f8cSSatish Balay Notes: 2986a30f8f8cSSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2987a30f8f8cSSatish Balay 2988a30f8f8cSSatish Balay .keywords: matrix, hashtable, factor, HT 2989a30f8f8cSSatish Balay 2990a30f8f8cSSatish Balay .seealso: MatSetOption() 2991dcf5cc72SBarry Smith @XXXXX*/ 2992dcf5cc72SBarry Smith 299324d5174aSHong Zhang 2994985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 299524d5174aSHong Zhang { 299624d5174aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2997f4c0e9e4SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2998ca54ac64SHong Zhang PetscReal atmp; 299987828ca2SBarry Smith PetscReal *work,*svalues,*rvalues; 3000dfbe8321SBarry Smith PetscErrorCode ierr; 30011302d50aSBarry Smith PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 30021302d50aSBarry Smith PetscMPIInt rank,size; 30031302d50aSBarry Smith PetscInt *rowners_bs,dest,count,source; 300487828ca2SBarry Smith PetscScalar *va; 30058a1c53f2SBarry Smith MatScalar *ba; 3006f4c0e9e4SHong Zhang MPI_Status stat; 300724d5174aSHong Zhang 300824d5174aSHong Zhang PetscFunctionBegin; 3009e32f2f54SBarry Smith if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 30100298fd71SBarry Smith ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 30111ebc52fbSHong Zhang ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3012f4c0e9e4SHong Zhang 3013ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3014ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3015f4c0e9e4SHong Zhang 3016d0f46423SBarry Smith bs = A->rmap->bs; 3017f4c0e9e4SHong Zhang mbs = a->mbs; 3018f4c0e9e4SHong Zhang Mbs = a->Mbs; 3019f4c0e9e4SHong Zhang ba = b->a; 3020f4c0e9e4SHong Zhang bi = b->i; 3021f4c0e9e4SHong Zhang bj = b->j; 3022f4c0e9e4SHong Zhang 3023f4c0e9e4SHong Zhang /* find ownerships */ 3024d0f46423SBarry Smith rowners_bs = A->rmap->range; 3025f4c0e9e4SHong Zhang 3026f4c0e9e4SHong Zhang /* each proc creates an array to be distributed */ 3027785e854fSJed Brown ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 3028ca54ac64SHong Zhang ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 3029f4c0e9e4SHong Zhang 3030f4c0e9e4SHong Zhang /* row_max for B */ 3031b8475685SHong Zhang if (rank != size-1) { 3032f4c0e9e4SHong Zhang for (i=0; i<mbs; i++) { 3033f4c0e9e4SHong Zhang ncols = bi[1] - bi[0]; bi++; 3034f4c0e9e4SHong Zhang brow = bs*i; 3035f4c0e9e4SHong Zhang for (j=0; j<ncols; j++) { 3036f4c0e9e4SHong Zhang bcol = bs*(*bj); 3037f4c0e9e4SHong Zhang for (kcol=0; kcol<bs; kcol++) { 3038ca54ac64SHong Zhang col = bcol + kcol; /* local col index */ 303904d41228SHong Zhang col += rowners_bs[rank+1]; /* global col index */ 3040f4c0e9e4SHong Zhang for (krow=0; krow<bs; krow++) { 3041f4c0e9e4SHong Zhang atmp = PetscAbsScalar(*ba); ba++; 3042ca54ac64SHong Zhang row = brow + krow; /* local row index */ 3043ca54ac64SHong Zhang if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 3044f4c0e9e4SHong Zhang if (work[col] < atmp) work[col] = atmp; 3045f4c0e9e4SHong Zhang } 3046f4c0e9e4SHong Zhang } 3047f4c0e9e4SHong Zhang bj++; 3048f4c0e9e4SHong Zhang } 3049f4c0e9e4SHong Zhang } 3050f4c0e9e4SHong Zhang 3051f4c0e9e4SHong Zhang /* send values to its owners */ 3052f4c0e9e4SHong Zhang for (dest=rank+1; dest<size; dest++) { 3053f4c0e9e4SHong Zhang svalues = work + rowners_bs[dest]; 3054ca54ac64SHong Zhang count = rowners_bs[dest+1]-rowners_bs[dest]; 3055ce94432eSBarry Smith ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 3056ca54ac64SHong Zhang } 3057f4c0e9e4SHong Zhang } 3058f4c0e9e4SHong Zhang 3059f4c0e9e4SHong Zhang /* receive values */ 3060ca54ac64SHong Zhang if (rank) { 3061f4c0e9e4SHong Zhang rvalues = work; 3062ca54ac64SHong Zhang count = rowners_bs[rank+1]-rowners_bs[rank]; 3063f4c0e9e4SHong Zhang for (source=0; source<rank; source++) { 3064ce94432eSBarry Smith ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 3065f4c0e9e4SHong Zhang /* process values */ 3066f4c0e9e4SHong Zhang for (i=0; i<count; i++) { 3067ca54ac64SHong Zhang if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 3068f4c0e9e4SHong Zhang } 3069f4c0e9e4SHong Zhang } 3070ca54ac64SHong Zhang } 3071f4c0e9e4SHong Zhang 30721ebc52fbSHong Zhang ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3073ac355199SBarry Smith ierr = PetscFree(work);CHKERRQ(ierr); 307424d5174aSHong Zhang PetscFunctionReturn(0); 307524d5174aSHong Zhang } 30762798e883SHong Zhang 307741f059aeSBarry Smith PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 30782798e883SHong Zhang { 30792798e883SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3080dfbe8321SBarry Smith PetscErrorCode ierr; 3081d0f46423SBarry Smith PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 30823649974fSBarry Smith PetscScalar *x,*ptr,*from; 3083ffe4fb16SHong Zhang Vec bb1; 30843649974fSBarry Smith const PetscScalar *b; 3085ffe4fb16SHong Zhang 3086ffe4fb16SHong Zhang PetscFunctionBegin; 3087e32f2f54SBarry Smith if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3088e32f2f54SBarry Smith if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 3089ffe4fb16SHong Zhang 3090a2b30743SBarry Smith if (flag == SOR_APPLY_UPPER) { 309141f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3092a2b30743SBarry Smith PetscFunctionReturn(0); 3093a2b30743SBarry Smith } 3094a2b30743SBarry Smith 3095ffe4fb16SHong Zhang if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 3096ffe4fb16SHong Zhang if (flag & SOR_ZERO_INITIAL_GUESS) { 309741f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 3098ffe4fb16SHong Zhang its--; 3099ffe4fb16SHong Zhang } 3100ffe4fb16SHong Zhang 3101ffe4fb16SHong Zhang ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 3102ffe4fb16SHong Zhang while (its--) { 3103ffe4fb16SHong Zhang 3104ffe4fb16SHong Zhang /* lower triangular part: slvec0b = - B^T*xx */ 3105ffe4fb16SHong Zhang ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3106ffe4fb16SHong Zhang 3107ffe4fb16SHong Zhang /* copy xx into slvec0a */ 31081ebc52fbSHong Zhang ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 31091ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3110ffe4fb16SHong Zhang ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 31111ebc52fbSHong Zhang ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 3112ffe4fb16SHong Zhang 3113efb30889SBarry Smith ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 3114ffe4fb16SHong Zhang 3115ffe4fb16SHong Zhang /* copy bb into slvec1a */ 31161ebc52fbSHong Zhang ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 31173649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3118ffe4fb16SHong Zhang ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 31191ebc52fbSHong Zhang ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 3120ffe4fb16SHong Zhang 3121ffe4fb16SHong Zhang /* set slvec1b = 0 */ 3122fa22f6d0SBarry Smith ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3123ffe4fb16SHong Zhang 3124ca9f406cSSatish Balay ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 31251ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3126a8b09249SBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3127ca9f406cSSatish Balay ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3128ffe4fb16SHong Zhang 3129ffe4fb16SHong Zhang /* upper triangular part: bb1 = bb1 - B*x */ 3130ffe4fb16SHong Zhang ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 3131ffe4fb16SHong Zhang 3132ffe4fb16SHong Zhang /* local diagonal sweep */ 313341f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 3134ffe4fb16SHong Zhang } 31356bf464f9SBarry Smith ierr = VecDestroy(&bb1);CHKERRQ(ierr); 3136fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 313741f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3138fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 313941f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 3140fa22f6d0SBarry Smith } else if (flag & SOR_EISENSTAT) { 3141fa22f6d0SBarry Smith Vec xx1; 3142ace3abfcSBarry Smith PetscBool hasop; 314320f1ed55SBarry Smith const PetscScalar *diag; 3144887ee2caSBarry Smith PetscScalar *sl,scale = (omega - 2.0)/omega; 314520f1ed55SBarry Smith PetscInt i,n; 3146fa22f6d0SBarry Smith 3147fa22f6d0SBarry Smith if (!mat->xx1) { 3148fa22f6d0SBarry Smith ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 3149fa22f6d0SBarry Smith ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 3150fa22f6d0SBarry Smith } 3151fa22f6d0SBarry Smith xx1 = mat->xx1; 3152fa22f6d0SBarry Smith bb1 = mat->bb1; 3153fa22f6d0SBarry Smith 315441f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 3155fa22f6d0SBarry Smith 3156fa22f6d0SBarry Smith if (!mat->diag) { 3157effcda25SBarry Smith /* this is wrong for same matrix with new nonzero values */ 31582a7a6963SBarry Smith ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 3159fa22f6d0SBarry Smith ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 3160fa22f6d0SBarry Smith } 3161fa22f6d0SBarry Smith ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 3162fa22f6d0SBarry Smith 3163fa22f6d0SBarry Smith if (hasop) { 3164fa22f6d0SBarry Smith ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 3165887ee2caSBarry Smith ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 316620f1ed55SBarry Smith } else { 316720f1ed55SBarry Smith /* 316820f1ed55SBarry Smith These two lines are replaced by code that may be a bit faster for a good compiler 316920f1ed55SBarry Smith ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 3170887ee2caSBarry Smith ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 317120f1ed55SBarry Smith */ 317220f1ed55SBarry Smith ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 31733649974fSBarry Smith ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 31743649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 317520f1ed55SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 317620f1ed55SBarry Smith ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 3177887ee2caSBarry Smith if (omega == 1.0) { 317826fbe8dcSKarl Rupp for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 317920f1ed55SBarry Smith ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 3180887ee2caSBarry Smith } else { 318126fbe8dcSKarl Rupp for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 3182887ee2caSBarry Smith ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 3183887ee2caSBarry Smith } 318420f1ed55SBarry Smith ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 31853649974fSBarry Smith ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 31863649974fSBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 318720f1ed55SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 318820f1ed55SBarry Smith } 3189fa22f6d0SBarry Smith 3190fa22f6d0SBarry Smith /* multiply off-diagonal portion of matrix */ 3191fa22f6d0SBarry Smith ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 3192fa22f6d0SBarry Smith ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 3193fa22f6d0SBarry Smith ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 3194fa22f6d0SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3195fa22f6d0SBarry Smith ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 3196fa22f6d0SBarry Smith ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 3197fa22f6d0SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3198fa22f6d0SBarry Smith ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3199fa22f6d0SBarry Smith ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3200effcda25SBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 3201fa22f6d0SBarry Smith 3202fa22f6d0SBarry Smith /* local sweep */ 320341f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 3204fa22f6d0SBarry Smith ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 3205f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 3206ffe4fb16SHong Zhang PetscFunctionReturn(0); 3207ffe4fb16SHong Zhang } 3208ffe4fb16SHong Zhang 320941f059aeSBarry Smith PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 3210ffe4fb16SHong Zhang { 3211ffe4fb16SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 3212dfbe8321SBarry Smith PetscErrorCode ierr; 32132798e883SHong Zhang Vec lvec1,bb1; 32142798e883SHong Zhang 32152798e883SHong Zhang PetscFunctionBegin; 3216e32f2f54SBarry Smith if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 3217e32f2f54SBarry Smith if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 32182798e883SHong Zhang 3219c14dc6b6SHong Zhang if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 32202798e883SHong Zhang if (flag & SOR_ZERO_INITIAL_GUESS) { 322141f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 32222798e883SHong Zhang its--; 32232798e883SHong Zhang } 32242798e883SHong Zhang 32252798e883SHong Zhang ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 32262798e883SHong Zhang ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 32272798e883SHong Zhang while (its--) { 3228ca9f406cSSatish Balay ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 32292798e883SHong Zhang 32302798e883SHong Zhang /* lower diagonal part: bb1 = bb - B^T*xx */ 32312798e883SHong Zhang ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 3232efb30889SBarry Smith ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 32332798e883SHong Zhang 3234ca9f406cSSatish Balay ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 32352798e883SHong Zhang ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 3236ca9f406cSSatish Balay ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 32372798e883SHong Zhang 32382798e883SHong Zhang /* upper diagonal part: bb1 = bb1 - B*x */ 3239efb30889SBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 32402798e883SHong Zhang ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 32412798e883SHong Zhang 3242ca9f406cSSatish Balay ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 32432798e883SHong Zhang 3244c14dc6b6SHong Zhang /* diagonal sweep */ 324541f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 32462798e883SHong Zhang } 32476bf464f9SBarry Smith ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 32486bf464f9SBarry Smith ierr = VecDestroy(&bb1);CHKERRQ(ierr); 32496bf464f9SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 32502798e883SHong Zhang PetscFunctionReturn(0); 32512798e883SHong Zhang } 32522798e883SHong Zhang 3253dfb205c3SBarry Smith /*@ 3254dfb205c3SBarry Smith MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 3255dfb205c3SBarry Smith CSR format the local rows. 3256dfb205c3SBarry Smith 3257dfb205c3SBarry Smith Collective on MPI_Comm 3258dfb205c3SBarry Smith 3259dfb205c3SBarry Smith Input Parameters: 3260dfb205c3SBarry Smith + comm - MPI communicator 3261dfb205c3SBarry Smith . bs - the block size, only a block size of 1 is supported 3262dfb205c3SBarry Smith . m - number of local rows (Cannot be PETSC_DECIDE) 3263dfb205c3SBarry Smith . n - This value should be the same as the local size used in creating the 3264dfb205c3SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3265dfb205c3SBarry Smith calculated if N is given) For square matrices n is almost always m. 3266dfb205c3SBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3267dfb205c3SBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3268483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 3269dfb205c3SBarry Smith . j - column indices 3270dfb205c3SBarry Smith - a - matrix values 3271dfb205c3SBarry Smith 3272dfb205c3SBarry Smith Output Parameter: 3273dfb205c3SBarry Smith . mat - the matrix 3274dfb205c3SBarry Smith 3275dfb205c3SBarry Smith Level: intermediate 3276dfb205c3SBarry Smith 3277dfb205c3SBarry Smith Notes: 3278dfb205c3SBarry Smith The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3279dfb205c3SBarry Smith thus you CANNOT change the matrix entries by changing the values of a[] after you have 3280dfb205c3SBarry Smith called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3281dfb205c3SBarry Smith 3282dfb205c3SBarry Smith The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3283dfb205c3SBarry Smith 3284dfb205c3SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 3285dfb205c3SBarry Smith 3286dfb205c3SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 328769b1f4b7SBarry Smith MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3288dfb205c3SBarry Smith @*/ 32897087cfbeSBarry Smith PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3290dfb205c3SBarry Smith { 3291dfb205c3SBarry Smith PetscErrorCode ierr; 3292dfb205c3SBarry Smith 3293dfb205c3SBarry Smith 3294dfb205c3SBarry Smith PetscFunctionBegin; 3295f23aa3ddSBarry Smith if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3296dfb205c3SBarry Smith if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3297dfb205c3SBarry Smith ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3298dfb205c3SBarry Smith ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3299dfb205c3SBarry Smith ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3300dfb205c3SBarry Smith ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3301dfb205c3SBarry Smith PetscFunctionReturn(0); 3302dfb205c3SBarry Smith } 3303dfb205c3SBarry Smith 3304dfb205c3SBarry Smith 3305dfb205c3SBarry Smith /*@C 3306dfb205c3SBarry Smith MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3307dfb205c3SBarry Smith (the default parallel PETSc format). 3308dfb205c3SBarry Smith 3309dfb205c3SBarry Smith Collective on MPI_Comm 3310dfb205c3SBarry Smith 3311dfb205c3SBarry Smith Input Parameters: 33121c4f3114SJed Brown + B - the matrix 3313dfb205c3SBarry Smith . bs - the block size 3314dfb205c3SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 3315dfb205c3SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 3316dfb205c3SBarry Smith - v - optional values in the matrix 3317dfb205c3SBarry Smith 3318dfb205c3SBarry Smith Level: developer 3319dfb205c3SBarry Smith 3320dfb205c3SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 3321dfb205c3SBarry Smith 332269b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3323dfb205c3SBarry Smith @*/ 33247087cfbeSBarry Smith PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3325dfb205c3SBarry Smith { 33264ac538c5SBarry Smith PetscErrorCode ierr; 3327dfb205c3SBarry Smith 3328dfb205c3SBarry Smith PetscFunctionBegin; 33294ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3330dfb205c3SBarry Smith PetscFunctionReturn(0); 3331dfb205c3SBarry Smith } 3332dfb205c3SBarry Smith 333310c56fdeSHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 33344dcd73b1SHong Zhang { 33354dcd73b1SHong Zhang PetscErrorCode ierr; 333610c56fdeSHong Zhang PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 333710c56fdeSHong Zhang PetscInt *indx; 333810c56fdeSHong Zhang PetscScalar *values; 3339dfb205c3SBarry Smith 33404dcd73b1SHong Zhang PetscFunctionBegin; 33414dcd73b1SHong Zhang ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 334210c56fdeSHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 334310c56fdeSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 334410c56fdeSHong Zhang PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 334510c56fdeSHong Zhang PetscInt *bindx,rmax=a->rmax,j; 33464dcd73b1SHong Zhang 334710c56fdeSHong Zhang ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 334810c56fdeSHong Zhang mbs = m/bs; Nbs = N/cbs; 334910c56fdeSHong Zhang if (n == PETSC_DECIDE) { 335010c56fdeSHong Zhang ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 335110c56fdeSHong Zhang } 335210c56fdeSHong Zhang /* Check sum(n) = Nbs */ 3353b2566f29SBarry Smith ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 335410c56fdeSHong Zhang if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 335510c56fdeSHong Zhang 335610c56fdeSHong Zhang ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 335710c56fdeSHong Zhang rstart -= mbs; 33584dcd73b1SHong Zhang 33594dcd73b1SHong Zhang ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 336010c56fdeSHong Zhang ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3361e491d569SHong Zhang ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 336210c56fdeSHong Zhang for (i=0; i<mbs; i++) { 33634dcd73b1SHong Zhang ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 33644dcd73b1SHong Zhang nnz = nnz/bs; 33654dcd73b1SHong Zhang for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 33664dcd73b1SHong Zhang ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 33674dcd73b1SHong Zhang ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 33684dcd73b1SHong Zhang } 3369e491d569SHong Zhang ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 33704dcd73b1SHong Zhang ierr = PetscFree(bindx);CHKERRQ(ierr); 33714dcd73b1SHong Zhang 33724dcd73b1SHong Zhang ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 337310c56fdeSHong Zhang ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 33744dcd73b1SHong Zhang ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 33754dcd73b1SHong Zhang ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 33764dcd73b1SHong Zhang ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 33774dcd73b1SHong Zhang ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 33784dcd73b1SHong Zhang } 33794dcd73b1SHong Zhang 338010c56fdeSHong Zhang /* numeric phase */ 33814dcd73b1SHong Zhang ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 338210c56fdeSHong Zhang ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 33834dcd73b1SHong Zhang 3384e491d569SHong Zhang ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 33854dcd73b1SHong Zhang for (i=0; i<m; i++) { 33864dcd73b1SHong Zhang ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 33874dcd73b1SHong Zhang Ii = i + rstart; 338810c56fdeSHong Zhang ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 33894dcd73b1SHong Zhang ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 33904dcd73b1SHong Zhang } 3391e491d569SHong Zhang ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 339210c56fdeSHong Zhang ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 339310c56fdeSHong Zhang ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 33944dcd73b1SHong Zhang PetscFunctionReturn(0); 33954dcd73b1SHong Zhang } 3396