1a30f8f8cSSatish Balay 2c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 5c6db04a5SJed Brown #include <petscblaslapack.h> 6a30f8f8cSSatish Balay 76214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 8cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 96214f412SHong Zhang #endif 10b147fbf3SStefano Zampini 11b147fbf3SStefano Zampini /* This could be moved to matimpl.h */ 12b147fbf3SStefano Zampini static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 13b147fbf3SStefano Zampini { 14b147fbf3SStefano Zampini Mat preallocator; 15b147fbf3SStefano Zampini PetscInt r,rstart,rend; 16b147fbf3SStefano Zampini PetscInt bs,i,m,n,M,N; 17b147fbf3SStefano Zampini PetscBool cong = PETSC_TRUE; 18b147fbf3SStefano Zampini PetscErrorCode ierr; 19b147fbf3SStefano Zampini 20b147fbf3SStefano Zampini PetscFunctionBegin; 21b147fbf3SStefano Zampini PetscValidHeaderSpecific(B,MAT_CLASSID,1); 22b147fbf3SStefano Zampini PetscValidLogicalCollectiveInt(B,nm,2); 23b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 24b147fbf3SStefano Zampini PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 25b147fbf3SStefano Zampini ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 26b147fbf3SStefano Zampini if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 27b147fbf3SStefano Zampini } 28b147fbf3SStefano Zampini PetscValidLogicalCollectiveBool(B,fill,5); 29b147fbf3SStefano Zampini ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 30b147fbf3SStefano Zampini ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 31b147fbf3SStefano Zampini ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 32b147fbf3SStefano Zampini ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 33b147fbf3SStefano Zampini ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 34b147fbf3SStefano Zampini ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 35b147fbf3SStefano Zampini ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 36b147fbf3SStefano Zampini ierr = MatSetUp(preallocator);CHKERRQ(ierr); 37b147fbf3SStefano Zampini ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 38b147fbf3SStefano Zampini for (r = rstart; r < rend; ++r) { 39b147fbf3SStefano Zampini PetscInt ncols; 40b147fbf3SStefano Zampini const PetscInt *row; 41b147fbf3SStefano Zampini const PetscScalar *vals; 42b147fbf3SStefano Zampini 43b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 44b147fbf3SStefano Zampini ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 45b147fbf3SStefano Zampini ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 46b147fbf3SStefano Zampini if (symm && symm[i]) { 47b147fbf3SStefano Zampini ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 48b147fbf3SStefano Zampini } 49b147fbf3SStefano Zampini ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 50b147fbf3SStefano Zampini } 51b147fbf3SStefano Zampini } 52b147fbf3SStefano Zampini ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 53b147fbf3SStefano Zampini ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 54b147fbf3SStefano Zampini ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 55b147fbf3SStefano Zampini ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 56b147fbf3SStefano Zampini PetscFunctionReturn(0); 57b147fbf3SStefano Zampini } 58b147fbf3SStefano Zampini 5928d58a37SPierre Jolivet PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60b147fbf3SStefano Zampini { 61b147fbf3SStefano Zampini Mat B; 62b147fbf3SStefano Zampini PetscErrorCode ierr; 63b147fbf3SStefano Zampini PetscInt r; 64b147fbf3SStefano Zampini 65b147fbf3SStefano Zampini PetscFunctionBegin; 66b147fbf3SStefano Zampini if (reuse != MAT_REUSE_MATRIX) { 6728d58a37SPierre Jolivet PetscBool symm = PETSC_TRUE,isdense; 68b147fbf3SStefano Zampini PetscInt bs; 69b147fbf3SStefano Zampini 70b147fbf3SStefano Zampini ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 71b147fbf3SStefano Zampini ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 72b147fbf3SStefano Zampini ierr = MatSetType(B,newtype);CHKERRQ(ierr); 73b147fbf3SStefano Zampini ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 74b147fbf3SStefano Zampini ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 75b147fbf3SStefano Zampini ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 76b147fbf3SStefano Zampini ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 7728d58a37SPierre Jolivet ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr); 7828d58a37SPierre Jolivet if (!isdense) { 79b147fbf3SStefano Zampini ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 80b147fbf3SStefano Zampini ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 81b147fbf3SStefano Zampini ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 8228d58a37SPierre Jolivet } else { 8328d58a37SPierre Jolivet ierr = MatSetUp(B);CHKERRQ(ierr); 8428d58a37SPierre Jolivet } 8528d58a37SPierre Jolivet } else { 8628d58a37SPierre Jolivet B = *newmat; 8728d58a37SPierre Jolivet ierr = MatZeroEntries(B);CHKERRQ(ierr); 8828d58a37SPierre Jolivet } 89b147fbf3SStefano Zampini 90b147fbf3SStefano Zampini ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 91b147fbf3SStefano Zampini for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 92b147fbf3SStefano Zampini PetscInt ncols; 93b147fbf3SStefano Zampini const PetscInt *row; 94b147fbf3SStefano Zampini const PetscScalar *vals; 95b147fbf3SStefano Zampini 96b147fbf3SStefano Zampini ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 97b147fbf3SStefano Zampini ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 98eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 99eb1ec7c1SStefano Zampini if (A->hermitian) { 100eb1ec7c1SStefano Zampini PetscInt i; 101eb1ec7c1SStefano Zampini for (i = 0; i < ncols; i++) { 102eb1ec7c1SStefano Zampini ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr); 103eb1ec7c1SStefano Zampini } 104eb1ec7c1SStefano Zampini } else { 105b147fbf3SStefano Zampini ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 106eb1ec7c1SStefano Zampini } 107eb1ec7c1SStefano Zampini #else 108eb1ec7c1SStefano Zampini ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 109eb1ec7c1SStefano Zampini #endif 110b147fbf3SStefano Zampini ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 111b147fbf3SStefano Zampini } 112b147fbf3SStefano Zampini ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 113b147fbf3SStefano Zampini ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 114b147fbf3SStefano Zampini ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 115b147fbf3SStefano Zampini 116b147fbf3SStefano Zampini if (reuse == MAT_INPLACE_MATRIX) { 117b147fbf3SStefano Zampini ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 118b147fbf3SStefano Zampini } else { 119b147fbf3SStefano Zampini *newmat = B; 120b147fbf3SStefano Zampini } 121b147fbf3SStefano Zampini PetscFunctionReturn(0); 122b147fbf3SStefano Zampini } 123b147fbf3SStefano Zampini 1247087cfbeSBarry Smith PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 125a30f8f8cSSatish Balay { 126f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 127dfbe8321SBarry Smith PetscErrorCode ierr; 128a30f8f8cSSatish Balay 129a30f8f8cSSatish Balay PetscFunctionBegin; 130a30f8f8cSSatish Balay ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 131a30f8f8cSSatish Balay ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 132a30f8f8cSSatish Balay PetscFunctionReturn(0); 133a30f8f8cSSatish Balay } 134a30f8f8cSSatish Balay 1357087cfbeSBarry Smith PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 136a30f8f8cSSatish Balay { 137f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 138dfbe8321SBarry Smith PetscErrorCode ierr; 139a30f8f8cSSatish Balay 140a30f8f8cSSatish Balay PetscFunctionBegin; 141a30f8f8cSSatish Balay ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 142a30f8f8cSSatish Balay ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 143a30f8f8cSSatish Balay PetscFunctionReturn(0); 144a30f8f8cSSatish Balay } 145a30f8f8cSSatish Balay 146d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 147a30f8f8cSSatish Balay { \ 148a30f8f8cSSatish Balay brow = row/bs; \ 149a30f8f8cSSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 150a30f8f8cSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 151a30f8f8cSSatish Balay bcol = col/bs; \ 152a30f8f8cSSatish Balay ridx = row % bs; cidx = col % bs; \ 153a30f8f8cSSatish Balay low = 0; high = nrow; \ 154a30f8f8cSSatish Balay while (high-low > 3) { \ 155a30f8f8cSSatish Balay t = (low+high)/2; \ 156a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 157a30f8f8cSSatish Balay else low = t; \ 158a30f8f8cSSatish Balay } \ 159a30f8f8cSSatish Balay for (_i=low; _i<high; _i++) { \ 160a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 161a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 162a30f8f8cSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 163a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 164a30f8f8cSSatish Balay else *bap = value; \ 165a30f8f8cSSatish Balay goto a_noinsert; \ 166a30f8f8cSSatish Balay } \ 167a30f8f8cSSatish Balay } \ 168a30f8f8cSSatish Balay if (a->nonew == 1) goto a_noinsert; \ 169d40312a9SBarry Smith if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 170fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 171a30f8f8cSSatish Balay N = nrow++ - 1; \ 172a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 173580bdb30SBarry Smith ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 174580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 175580bdb30SBarry Smith ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 176a30f8f8cSSatish Balay rp[_i] = bcol; \ 177a30f8f8cSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 178e56f5c9eSBarry Smith A->nonzerostate++;\ 179a30f8f8cSSatish Balay a_noinsert:; \ 180a30f8f8cSSatish Balay ailen[brow] = nrow; \ 181a30f8f8cSSatish Balay } 182e5e170daSBarry Smith 183d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 184a30f8f8cSSatish Balay { \ 185a30f8f8cSSatish Balay brow = row/bs; \ 186a30f8f8cSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 187a30f8f8cSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 188a30f8f8cSSatish Balay bcol = col/bs; \ 189a30f8f8cSSatish Balay ridx = row % bs; cidx = col % bs; \ 190a30f8f8cSSatish Balay low = 0; high = nrow; \ 191a30f8f8cSSatish Balay while (high-low > 3) { \ 192a30f8f8cSSatish Balay t = (low+high)/2; \ 193a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 194a30f8f8cSSatish Balay else low = t; \ 195a30f8f8cSSatish Balay } \ 196a30f8f8cSSatish Balay for (_i=low; _i<high; _i++) { \ 197a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 198a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 199a30f8f8cSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 200a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 201a30f8f8cSSatish Balay else *bap = value; \ 202a30f8f8cSSatish Balay goto b_noinsert; \ 203a30f8f8cSSatish Balay } \ 204a30f8f8cSSatish Balay } \ 205a30f8f8cSSatish Balay if (b->nonew == 1) goto b_noinsert; \ 206d40312a9SBarry Smith if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 207fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 208a30f8f8cSSatish Balay N = nrow++ - 1; \ 209a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 210580bdb30SBarry Smith ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 211580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 212580bdb30SBarry Smith ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 213a30f8f8cSSatish Balay rp[_i] = bcol; \ 214a30f8f8cSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 215e56f5c9eSBarry Smith B->nonzerostate++;\ 216a30f8f8cSSatish Balay b_noinsert:; \ 217a30f8f8cSSatish Balay bilen[brow] = nrow; \ 218a30f8f8cSSatish Balay } 219a30f8f8cSSatish Balay 220a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks). 221476417e5SBarry Smith Any a(i,j) with i>j input by user is ingored or generates an error 222a30f8f8cSSatish Balay */ 223dd6ea824SBarry Smith PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 224a30f8f8cSSatish Balay { 225a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 226a30f8f8cSSatish Balay MatScalar value; 227ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 228dfbe8321SBarry Smith PetscErrorCode ierr; 2291302d50aSBarry Smith PetscInt i,j,row,col; 230d0f46423SBarry Smith PetscInt rstart_orig=mat->rmap->rstart; 231d0f46423SBarry Smith PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 232d0f46423SBarry Smith PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 233a30f8f8cSSatish Balay 234a30f8f8cSSatish Balay /* Some Variables required in the macro */ 235a30f8f8cSSatish Balay Mat A = baij->A; 236a30f8f8cSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 2371302d50aSBarry Smith PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 238a30f8f8cSSatish Balay MatScalar *aa =a->a; 239a30f8f8cSSatish Balay 240a30f8f8cSSatish Balay Mat B = baij->B; 241a30f8f8cSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 2421302d50aSBarry Smith PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 243a30f8f8cSSatish Balay MatScalar *ba =b->a; 244a30f8f8cSSatish Balay 2451302d50aSBarry Smith PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 2461302d50aSBarry Smith PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 247a30f8f8cSSatish Balay MatScalar *ap,*bap; 248a30f8f8cSSatish Balay 249a30f8f8cSSatish Balay /* for stash */ 2500298fd71SBarry Smith PetscInt n_loc, *in_loc = NULL; 2510298fd71SBarry Smith MatScalar *v_loc = NULL; 252a30f8f8cSSatish Balay 253a30f8f8cSSatish Balay PetscFunctionBegin; 254a30f8f8cSSatish Balay if (!baij->donotstash) { 25559ffdab8SBarry Smith if (n > baij->n_loc) { 25659ffdab8SBarry Smith ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 25759ffdab8SBarry Smith ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 258785e854fSJed Brown ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 259785e854fSJed Brown ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 26026fbe8dcSKarl Rupp 26159ffdab8SBarry Smith baij->n_loc = n; 26259ffdab8SBarry Smith } 26359ffdab8SBarry Smith in_loc = baij->in_loc; 26459ffdab8SBarry Smith v_loc = baij->v_loc; 265a30f8f8cSSatish Balay } 266a30f8f8cSSatish Balay 267a30f8f8cSSatish Balay for (i=0; i<m; i++) { 268a30f8f8cSSatish Balay if (im[i] < 0) continue; 2692515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 270e32f2f54SBarry Smith if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 271a30f8f8cSSatish Balay #endif 272a30f8f8cSSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 273a30f8f8cSSatish Balay row = im[i] - rstart_orig; /* local row index */ 274a30f8f8cSSatish Balay for (j=0; j<n; j++) { 27501b2bd88SHong Zhang if (im[i]/bs > in[j]/bs) { 27601b2bd88SHong Zhang if (a->ignore_ltriangular) { 27701b2bd88SHong Zhang continue; /* ignore lower triangular blocks */ 27826fbe8dcSKarl Rupp } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 27901b2bd88SHong Zhang } 280a30f8f8cSSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 281a30f8f8cSSatish Balay col = in[j] - cstart_orig; /* local col index */ 282a30f8f8cSSatish Balay brow = row/bs; bcol = col/bs; 283a30f8f8cSSatish Balay if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 284db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 285db4deed7SKarl Rupp else value = v[i+j*m]; 286d40312a9SBarry Smith MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 287a30f8f8cSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 288a30f8f8cSSatish Balay } else if (in[j] < 0) continue; 2892515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 290cb9801acSJed Brown else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 291a30f8f8cSSatish Balay #endif 292a30f8f8cSSatish Balay else { /* off-diag entry (B) */ 293a30f8f8cSSatish Balay if (mat->was_assembled) { 294a30f8f8cSSatish Balay if (!baij->colmap) { 295ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 296a30f8f8cSSatish Balay } 297a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 298a30f8f8cSSatish Balay ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 29971730473SSatish Balay col = col - 1; 300a30f8f8cSSatish Balay #else 30171730473SSatish Balay col = baij->colmap[in[j]/bs] - 1; 302a30f8f8cSSatish Balay #endif 303a30f8f8cSSatish Balay if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 304ab9863d7SBarry Smith ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 305a30f8f8cSSatish Balay col = in[j]; 306a30f8f8cSSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 307a30f8f8cSSatish Balay B = baij->B; 308a30f8f8cSSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 309a30f8f8cSSatish Balay bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 310a30f8f8cSSatish Balay ba = b->a; 31171730473SSatish Balay } else col += in[j]%bs; 312a30f8f8cSSatish Balay } else col = in[j]; 313db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 314db4deed7SKarl Rupp else value = v[i+j*m]; 315d40312a9SBarry Smith MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 316a30f8f8cSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 317a30f8f8cSSatish Balay } 318a30f8f8cSSatish Balay } 319a30f8f8cSSatish Balay } else { /* off processor entry */ 3204cb17eb5SBarry Smith if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 321a30f8f8cSSatish Balay if (!baij->donotstash) { 3225080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 323a30f8f8cSSatish Balay n_loc = 0; 324a30f8f8cSSatish Balay for (j=0; j<n; j++) { 325f65c83cfSHong Zhang if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 326a30f8f8cSSatish Balay in_loc[n_loc] = in[j]; 327a30f8f8cSSatish Balay if (roworiented) { 328a30f8f8cSSatish Balay v_loc[n_loc] = v[i*n+j]; 329a30f8f8cSSatish Balay } else { 330a30f8f8cSSatish Balay v_loc[n_loc] = v[j*m+i]; 331a30f8f8cSSatish Balay } 332a30f8f8cSSatish Balay n_loc++; 333a30f8f8cSSatish Balay } 334b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 335a30f8f8cSSatish Balay } 336a30f8f8cSSatish Balay } 337a30f8f8cSSatish Balay } 338a30f8f8cSSatish Balay PetscFunctionReturn(0); 339a30f8f8cSSatish Balay } 340a30f8f8cSSatish Balay 34136bd2089SBarry Smith PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 34236bd2089SBarry Smith { 34336bd2089SBarry Smith Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 34436bd2089SBarry Smith PetscErrorCode ierr; 34536bd2089SBarry Smith PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 34636bd2089SBarry Smith PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 34736bd2089SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 34836bd2089SBarry Smith PetscBool roworiented=a->roworiented; 34936bd2089SBarry Smith const PetscScalar *value = v; 35036bd2089SBarry Smith MatScalar *ap,*aa = a->a,*bap; 35136bd2089SBarry Smith 35236bd2089SBarry Smith PetscFunctionBegin; 35336bd2089SBarry Smith if (col < row) { 35436bd2089SBarry Smith if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 35536bd2089SBarry Smith else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 35636bd2089SBarry Smith } 35736bd2089SBarry Smith rp = aj + ai[row]; 35836bd2089SBarry Smith ap = aa + bs2*ai[row]; 35936bd2089SBarry Smith rmax = imax[row]; 36036bd2089SBarry Smith nrow = ailen[row]; 36136bd2089SBarry Smith value = v; 36236bd2089SBarry Smith low = 0; 36336bd2089SBarry Smith high = nrow; 36436bd2089SBarry Smith 36536bd2089SBarry Smith while (high-low > 7) { 36636bd2089SBarry Smith t = (low+high)/2; 36736bd2089SBarry Smith if (rp[t] > col) high = t; 36836bd2089SBarry Smith else low = t; 36936bd2089SBarry Smith } 37036bd2089SBarry Smith for (i=low; i<high; i++) { 37136bd2089SBarry Smith if (rp[i] > col) break; 37236bd2089SBarry Smith if (rp[i] == col) { 37336bd2089SBarry Smith bap = ap + bs2*i; 37436bd2089SBarry Smith if (roworiented) { 37536bd2089SBarry Smith if (is == ADD_VALUES) { 37636bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 37736bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 37836bd2089SBarry Smith bap[jj] += *value++; 37936bd2089SBarry Smith } 38036bd2089SBarry Smith } 38136bd2089SBarry Smith } else { 38236bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 38336bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 38436bd2089SBarry Smith bap[jj] = *value++; 38536bd2089SBarry Smith } 38636bd2089SBarry Smith } 38736bd2089SBarry Smith } 38836bd2089SBarry Smith } else { 38936bd2089SBarry Smith if (is == ADD_VALUES) { 39036bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 39136bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 39236bd2089SBarry Smith *bap++ += *value++; 39336bd2089SBarry Smith } 39436bd2089SBarry Smith } 39536bd2089SBarry Smith } else { 39636bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 39736bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 39836bd2089SBarry Smith *bap++ = *value++; 39936bd2089SBarry Smith } 40036bd2089SBarry Smith } 40136bd2089SBarry Smith } 40236bd2089SBarry Smith } 40336bd2089SBarry Smith goto noinsert2; 40436bd2089SBarry Smith } 40536bd2089SBarry Smith } 40636bd2089SBarry Smith if (nonew == 1) goto noinsert2; 40736bd2089SBarry Smith if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 40836bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 40936bd2089SBarry Smith N = nrow++ - 1; high++; 41036bd2089SBarry Smith /* shift up all the later entries in this row */ 411580bdb30SBarry Smith ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 412580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 41336bd2089SBarry Smith rp[i] = col; 41436bd2089SBarry Smith bap = ap + bs2*i; 41536bd2089SBarry Smith if (roworiented) { 41636bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 41736bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 41836bd2089SBarry Smith bap[jj] = *value++; 41936bd2089SBarry Smith } 42036bd2089SBarry Smith } 42136bd2089SBarry Smith } else { 42236bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 42336bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 42436bd2089SBarry Smith *bap++ = *value++; 42536bd2089SBarry Smith } 42636bd2089SBarry Smith } 42736bd2089SBarry Smith } 42836bd2089SBarry Smith noinsert2:; 42936bd2089SBarry Smith ailen[row] = nrow; 43036bd2089SBarry Smith PetscFunctionReturn(0); 43136bd2089SBarry Smith } 43236bd2089SBarry Smith 43336bd2089SBarry Smith /* 43436bd2089SBarry Smith This routine is exactly duplicated in mpibaij.c 43536bd2089SBarry Smith */ 43636bd2089SBarry Smith PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 43736bd2089SBarry Smith { 43836bd2089SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 43936bd2089SBarry Smith PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 44036bd2089SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 44136bd2089SBarry Smith PetscErrorCode ierr; 44236bd2089SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 44336bd2089SBarry Smith PetscBool roworiented=a->roworiented; 44436bd2089SBarry Smith const PetscScalar *value = v; 44536bd2089SBarry Smith MatScalar *ap,*aa = a->a,*bap; 44636bd2089SBarry Smith 44736bd2089SBarry Smith PetscFunctionBegin; 44836bd2089SBarry Smith rp = aj + ai[row]; 44936bd2089SBarry Smith ap = aa + bs2*ai[row]; 45036bd2089SBarry Smith rmax = imax[row]; 45136bd2089SBarry Smith nrow = ailen[row]; 45236bd2089SBarry Smith low = 0; 45336bd2089SBarry Smith high = nrow; 45436bd2089SBarry Smith value = v; 45536bd2089SBarry Smith while (high-low > 7) { 45636bd2089SBarry Smith t = (low+high)/2; 45736bd2089SBarry Smith if (rp[t] > col) high = t; 45836bd2089SBarry Smith else low = t; 45936bd2089SBarry Smith } 46036bd2089SBarry Smith for (i=low; i<high; i++) { 46136bd2089SBarry Smith if (rp[i] > col) break; 46236bd2089SBarry Smith if (rp[i] == col) { 46336bd2089SBarry Smith bap = ap + bs2*i; 46436bd2089SBarry Smith if (roworiented) { 46536bd2089SBarry Smith if (is == ADD_VALUES) { 46636bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 46736bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 46836bd2089SBarry Smith bap[jj] += *value++; 46936bd2089SBarry Smith } 47036bd2089SBarry Smith } 47136bd2089SBarry Smith } else { 47236bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 47336bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 47436bd2089SBarry Smith bap[jj] = *value++; 47536bd2089SBarry Smith } 47636bd2089SBarry Smith } 47736bd2089SBarry Smith } 47836bd2089SBarry Smith } else { 47936bd2089SBarry Smith if (is == ADD_VALUES) { 48036bd2089SBarry Smith for (ii=0; ii<bs; ii++,value+=bs) { 48136bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 48236bd2089SBarry Smith bap[jj] += value[jj]; 48336bd2089SBarry Smith } 48436bd2089SBarry Smith bap += bs; 48536bd2089SBarry Smith } 48636bd2089SBarry Smith } else { 48736bd2089SBarry Smith for (ii=0; ii<bs; ii++,value+=bs) { 48836bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 48936bd2089SBarry Smith bap[jj] = value[jj]; 49036bd2089SBarry Smith } 49136bd2089SBarry Smith bap += bs; 49236bd2089SBarry Smith } 49336bd2089SBarry Smith } 49436bd2089SBarry Smith } 49536bd2089SBarry Smith goto noinsert2; 49636bd2089SBarry Smith } 49736bd2089SBarry Smith } 49836bd2089SBarry Smith if (nonew == 1) goto noinsert2; 49936bd2089SBarry Smith if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 50036bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 50136bd2089SBarry Smith N = nrow++ - 1; high++; 50236bd2089SBarry Smith /* shift up all the later entries in this row */ 503580bdb30SBarry Smith ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 504580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 50536bd2089SBarry Smith rp[i] = col; 50636bd2089SBarry Smith bap = ap + bs2*i; 50736bd2089SBarry Smith if (roworiented) { 50836bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 50936bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 51036bd2089SBarry Smith bap[jj] = *value++; 51136bd2089SBarry Smith } 51236bd2089SBarry Smith } 51336bd2089SBarry Smith } else { 51436bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 51536bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 51636bd2089SBarry Smith *bap++ = *value++; 51736bd2089SBarry Smith } 51836bd2089SBarry Smith } 51936bd2089SBarry Smith } 52036bd2089SBarry Smith noinsert2:; 52136bd2089SBarry Smith ailen[row] = nrow; 52236bd2089SBarry Smith PetscFunctionReturn(0); 52336bd2089SBarry Smith } 52436bd2089SBarry Smith 52536bd2089SBarry Smith /* 52636bd2089SBarry Smith This routine could be optimized by removing the need for the block copy below and passing stride information 52736bd2089SBarry Smith to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 52836bd2089SBarry Smith */ 529dd6ea824SBarry Smith PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 530a30f8f8cSSatish Balay { 5310880e062SHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 532f15d580aSBarry Smith const MatScalar *value; 533f15d580aSBarry Smith MatScalar *barray =baij->barray; 534ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 535dfbe8321SBarry Smith PetscErrorCode ierr; 536899cda47SBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 537476417e5SBarry Smith PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 538476417e5SBarry Smith PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 5390880e062SHong Zhang 540a30f8f8cSSatish Balay PetscFunctionBegin; 5410880e062SHong Zhang if (!barray) { 542785e854fSJed Brown ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 5430880e062SHong Zhang baij->barray = barray; 5440880e062SHong Zhang } 5450880e062SHong Zhang 5460880e062SHong Zhang if (roworiented) { 5470880e062SHong Zhang stepval = (n-1)*bs; 5480880e062SHong Zhang } else { 5490880e062SHong Zhang stepval = (m-1)*bs; 5500880e062SHong Zhang } 5510880e062SHong Zhang for (i=0; i<m; i++) { 5520880e062SHong Zhang if (im[i] < 0) continue; 5532515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 554bb003d0fSBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 5550880e062SHong Zhang #endif 5560880e062SHong Zhang if (im[i] >= rstart && im[i] < rend) { 5570880e062SHong Zhang row = im[i] - rstart; 5580880e062SHong Zhang for (j=0; j<n; j++) { 559f3f98c53SJed Brown if (im[i] > in[j]) { 560f3f98c53SJed Brown if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 561e32f2f54SBarry Smith else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 562f3f98c53SJed Brown } 5630880e062SHong Zhang /* If NumCol = 1 then a copy is not required */ 5640880e062SHong Zhang if ((roworiented) && (n == 1)) { 565f15d580aSBarry Smith barray = (MatScalar*) v + i*bs2; 5660880e062SHong Zhang } else if ((!roworiented) && (m == 1)) { 567f15d580aSBarry Smith barray = (MatScalar*) v + j*bs2; 5680880e062SHong Zhang } else { /* Here a copy is required */ 5690880e062SHong Zhang if (roworiented) { 5700880e062SHong Zhang value = v + i*(stepval+bs)*bs + j*bs; 5710880e062SHong Zhang } else { 5720880e062SHong Zhang value = v + j*(stepval+bs)*bs + i*bs; 5730880e062SHong Zhang } 5740880e062SHong Zhang for (ii=0; ii<bs; ii++,value+=stepval) { 5750880e062SHong Zhang for (jj=0; jj<bs; jj++) { 5760880e062SHong Zhang *barray++ = *value++; 5770880e062SHong Zhang } 5780880e062SHong Zhang } 5790880e062SHong Zhang barray -=bs2; 5800880e062SHong Zhang } 5810880e062SHong Zhang 5820880e062SHong Zhang if (in[j] >= cstart && in[j] < cend) { 5830880e062SHong Zhang col = in[j] - cstart; 58436bd2089SBarry Smith ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 58526fbe8dcSKarl Rupp } else if (in[j] < 0) continue; 5862515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 587bb003d0fSBarry Smith else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 5880880e062SHong Zhang #endif 5890880e062SHong Zhang else { 5900880e062SHong Zhang if (mat->was_assembled) { 5910880e062SHong Zhang if (!baij->colmap) { 592ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 5930880e062SHong Zhang } 5940880e062SHong Zhang 5952515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 5960880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 5971302d50aSBarry Smith { PetscInt data; 5980880e062SHong Zhang ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 599e32f2f54SBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 6000880e062SHong Zhang } 6010880e062SHong Zhang #else 602e32f2f54SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 6030880e062SHong Zhang #endif 6040880e062SHong Zhang #endif 6050880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 6060880e062SHong Zhang ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6070880e062SHong Zhang col = (col - 1)/bs; 6080880e062SHong Zhang #else 6090880e062SHong Zhang col = (baij->colmap[in[j]] - 1)/bs; 6100880e062SHong Zhang #endif 6110880e062SHong Zhang if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 612ab9863d7SBarry Smith ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 6130880e062SHong Zhang col = in[j]; 6140880e062SHong Zhang } 61526fbe8dcSKarl Rupp } else col = in[j]; 61636bd2089SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 6170880e062SHong Zhang } 6180880e062SHong Zhang } 6190880e062SHong Zhang } else { 620bb003d0fSBarry Smith if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 6210880e062SHong Zhang if (!baij->donotstash) { 6220880e062SHong Zhang if (roworiented) { 6230880e062SHong Zhang ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6240880e062SHong Zhang } else { 6250880e062SHong Zhang ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6260880e062SHong Zhang } 6270880e062SHong Zhang } 6280880e062SHong Zhang } 6290880e062SHong Zhang } 6300880e062SHong Zhang PetscFunctionReturn(0); 631a30f8f8cSSatish Balay } 632a30f8f8cSSatish Balay 6331302d50aSBarry Smith PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 634a30f8f8cSSatish Balay { 635f3566a2aSHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 6366849ba73SBarry Smith PetscErrorCode ierr; 637d0f46423SBarry Smith PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 638d0f46423SBarry Smith PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 639a30f8f8cSSatish Balay 640a30f8f8cSSatish Balay PetscFunctionBegin; 641a30f8f8cSSatish Balay for (i=0; i<m; i++) { 642e32f2f54SBarry Smith if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 643e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 644a30f8f8cSSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 645a30f8f8cSSatish Balay row = idxm[i] - bsrstart; 646a30f8f8cSSatish Balay for (j=0; j<n; j++) { 647e32f2f54SBarry Smith if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 648e32f2f54SBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 649a30f8f8cSSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend) { 650a30f8f8cSSatish Balay col = idxn[j] - bscstart; 651c8407628SSatish Balay ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 652a30f8f8cSSatish Balay } else { 653a30f8f8cSSatish Balay if (!baij->colmap) { 654ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 655a30f8f8cSSatish Balay } 656a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 657a30f8f8cSSatish Balay ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 658a30f8f8cSSatish Balay data--; 659a30f8f8cSSatish Balay #else 660a30f8f8cSSatish Balay data = baij->colmap[idxn[j]/bs]-1; 661a30f8f8cSSatish Balay #endif 662a30f8f8cSSatish Balay if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 663a30f8f8cSSatish Balay else { 664a30f8f8cSSatish Balay col = data + idxn[j]%bs; 665e249d750SSatish Balay ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 666a30f8f8cSSatish Balay } 667a30f8f8cSSatish Balay } 668a30f8f8cSSatish Balay } 669f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 670a30f8f8cSSatish Balay } 671a30f8f8cSSatish Balay PetscFunctionReturn(0); 672a30f8f8cSSatish Balay } 673a30f8f8cSSatish Balay 674dfbe8321SBarry Smith PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 675a30f8f8cSSatish Balay { 676a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 677dfbe8321SBarry Smith PetscErrorCode ierr; 678a30f8f8cSSatish Balay PetscReal sum[2],*lnorm2; 679a30f8f8cSSatish Balay 680a30f8f8cSSatish Balay PetscFunctionBegin; 681a30f8f8cSSatish Balay if (baij->size == 1) { 682a30f8f8cSSatish Balay ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 683a30f8f8cSSatish Balay } else { 684a30f8f8cSSatish Balay if (type == NORM_FROBENIUS) { 685785e854fSJed Brown ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 686a30f8f8cSSatish Balay ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 687a30f8f8cSSatish Balay *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 688a30f8f8cSSatish Balay ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 689a30f8f8cSSatish Balay *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 690b2566f29SBarry Smith ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6918f1a2a5eSBarry Smith *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 692a30f8f8cSSatish Balay ierr = PetscFree(lnorm2);CHKERRQ(ierr); 6930b8dc8d2SHong Zhang } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 6940b8dc8d2SHong Zhang Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 6950b8dc8d2SHong Zhang Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 6960b8dc8d2SHong Zhang PetscReal *rsum,*rsum2,vabs; 697899cda47SBarry Smith PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 698d0f46423SBarry Smith PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 6990b8dc8d2SHong Zhang MatScalar *v; 7000b8dc8d2SHong Zhang 701dcca6d9dSJed Brown ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 702580bdb30SBarry Smith ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr); 7030b8dc8d2SHong Zhang /* Amat */ 7040b8dc8d2SHong Zhang v = amat->a; jj = amat->j; 7050b8dc8d2SHong Zhang for (brow=0; brow<mbs; brow++) { 7060b8dc8d2SHong Zhang grow = bs*(rstart + brow); 7070b8dc8d2SHong Zhang nz = amat->i[brow+1] - amat->i[brow]; 7080b8dc8d2SHong Zhang for (bcol=0; bcol<nz; bcol++) { 7090b8dc8d2SHong Zhang gcol = bs*(rstart + *jj); jj++; 7100b8dc8d2SHong Zhang for (col=0; col<bs; col++) { 7110b8dc8d2SHong Zhang for (row=0; row<bs; row++) { 7120b8dc8d2SHong Zhang vabs = PetscAbsScalar(*v); v++; 7130b8dc8d2SHong Zhang rsum[gcol+col] += vabs; 7140b8dc8d2SHong Zhang /* non-diagonal block */ 7150b8dc8d2SHong Zhang if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 7160b8dc8d2SHong Zhang } 7170b8dc8d2SHong Zhang } 7180b8dc8d2SHong Zhang } 71951f70360SJed Brown ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 7200b8dc8d2SHong Zhang } 7210b8dc8d2SHong Zhang /* Bmat */ 7220b8dc8d2SHong Zhang v = bmat->a; jj = bmat->j; 7230b8dc8d2SHong Zhang for (brow=0; brow<mbs; brow++) { 7240b8dc8d2SHong Zhang grow = bs*(rstart + brow); 7250b8dc8d2SHong Zhang nz = bmat->i[brow+1] - bmat->i[brow]; 7260b8dc8d2SHong Zhang for (bcol=0; bcol<nz; bcol++) { 7270b8dc8d2SHong Zhang gcol = bs*garray[*jj]; jj++; 7280b8dc8d2SHong Zhang for (col=0; col<bs; col++) { 7290b8dc8d2SHong Zhang for (row=0; row<bs; row++) { 7300b8dc8d2SHong Zhang vabs = PetscAbsScalar(*v); v++; 7310b8dc8d2SHong Zhang rsum[gcol+col] += vabs; 7320b8dc8d2SHong Zhang rsum[grow+row] += vabs; 7330b8dc8d2SHong Zhang } 7340b8dc8d2SHong Zhang } 7350b8dc8d2SHong Zhang } 73651f70360SJed Brown ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 7370b8dc8d2SHong Zhang } 738b2566f29SBarry Smith ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7390b8dc8d2SHong Zhang *norm = 0.0; 740d0f46423SBarry Smith for (col=0; col<mat->cmap->N; col++) { 7410b8dc8d2SHong Zhang if (rsum2[col] > *norm) *norm = rsum2[col]; 7420b8dc8d2SHong Zhang } 74374ed9c26SBarry Smith ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 744f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 745a30f8f8cSSatish Balay } 746a30f8f8cSSatish Balay PetscFunctionReturn(0); 747a30f8f8cSSatish Balay } 748a30f8f8cSSatish Balay 749dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 750a30f8f8cSSatish Balay { 751a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 752dfbe8321SBarry Smith PetscErrorCode ierr; 7531302d50aSBarry Smith PetscInt nstash,reallocs; 754a30f8f8cSSatish Balay 755a30f8f8cSSatish Balay PetscFunctionBegin; 75626fbe8dcSKarl Rupp if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 757a30f8f8cSSatish Balay 758d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 7591e2582c4SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 760a30f8f8cSSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 7611e2582c4SBarry Smith ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 762a30f8f8cSSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 7631e2582c4SBarry Smith ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 764a30f8f8cSSatish Balay PetscFunctionReturn(0); 765a30f8f8cSSatish Balay } 766a30f8f8cSSatish Balay 767dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 768a30f8f8cSSatish Balay { 769a30f8f8cSSatish Balay Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 770a30f8f8cSSatish Balay Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 7716849ba73SBarry Smith PetscErrorCode ierr; 77213f74950SBarry Smith PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 773e44c0bd4SBarry Smith PetscInt *row,*col; 774ace3abfcSBarry Smith PetscBool other_disassembled; 77513f74950SBarry Smith PetscMPIInt n; 776ace3abfcSBarry Smith PetscBool r1,r2,r3; 777a30f8f8cSSatish Balay MatScalar *val; 778a30f8f8cSSatish Balay 77991c97fd4SSatish Balay /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 780a30f8f8cSSatish Balay PetscFunctionBegin; 7814cb17eb5SBarry Smith if (!baij->donotstash && !mat->nooffprocentries) { 782a30f8f8cSSatish Balay while (1) { 783a30f8f8cSSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 784a30f8f8cSSatish Balay if (!flg) break; 785a30f8f8cSSatish Balay 786a30f8f8cSSatish Balay for (i=0; i<n;) { 787a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 78826fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 78926fbe8dcSKarl Rupp if (row[j] != rstart) break; 79026fbe8dcSKarl Rupp } 791a30f8f8cSSatish Balay if (j < n) ncols = j-i; 792a30f8f8cSSatish Balay else ncols = n-i; 793a30f8f8cSSatish Balay /* Now assemble all these values with a single function call */ 7944b4eb8d3SJed Brown ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 795a30f8f8cSSatish Balay i = j; 796a30f8f8cSSatish Balay } 797a30f8f8cSSatish Balay } 798a30f8f8cSSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 799a30f8f8cSSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 800a30f8f8cSSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 801a30f8f8cSSatish Balay restore the original flags */ 802a30f8f8cSSatish Balay r1 = baij->roworiented; 803a30f8f8cSSatish Balay r2 = a->roworiented; 80491c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 80526fbe8dcSKarl Rupp 806a30f8f8cSSatish Balay baij->roworiented = PETSC_FALSE; 807a30f8f8cSSatish Balay a->roworiented = PETSC_FALSE; 80826fbe8dcSKarl Rupp 80991c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 810a30f8f8cSSatish Balay while (1) { 811a30f8f8cSSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 812a30f8f8cSSatish Balay if (!flg) break; 813a30f8f8cSSatish Balay 814a30f8f8cSSatish Balay for (i=0; i<n;) { 815a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 81626fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 81726fbe8dcSKarl Rupp if (row[j] != rstart) break; 81826fbe8dcSKarl Rupp } 819a30f8f8cSSatish Balay if (j < n) ncols = j-i; 820a30f8f8cSSatish Balay else ncols = n-i; 8214b4eb8d3SJed Brown ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 822a30f8f8cSSatish Balay i = j; 823a30f8f8cSSatish Balay } 824a30f8f8cSSatish Balay } 825a30f8f8cSSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 82626fbe8dcSKarl Rupp 827a30f8f8cSSatish Balay baij->roworiented = r1; 828a30f8f8cSSatish Balay a->roworiented = r2; 82926fbe8dcSKarl Rupp 83091c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 831a30f8f8cSSatish Balay } 832a30f8f8cSSatish Balay 833a30f8f8cSSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 834a30f8f8cSSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 835a30f8f8cSSatish Balay 836a30f8f8cSSatish Balay /* determine if any processor has disassembled, if so we must 837a30f8f8cSSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 838a30f8f8cSSatish Balay /* 839a30f8f8cSSatish Balay if nonzero structure of submatrix B cannot change then we know that 840a30f8f8cSSatish Balay no processor disassembled thus we can skip this stuff 841a30f8f8cSSatish Balay */ 842a30f8f8cSSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 843b2566f29SBarry Smith ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 844a30f8f8cSSatish Balay if (mat->was_assembled && !other_disassembled) { 845ab9863d7SBarry Smith ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 846a30f8f8cSSatish Balay } 847a30f8f8cSSatish Balay } 848a30f8f8cSSatish Balay 849a30f8f8cSSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 85040781036SHong Zhang ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 851a30f8f8cSSatish Balay } 852a30f8f8cSSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 853a30f8f8cSSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 854a30f8f8cSSatish Balay 85574ed9c26SBarry Smith ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 85626fbe8dcSKarl Rupp 857a30f8f8cSSatish Balay baij->rowvalues = 0; 8584f9cfa9eSBarry Smith 8594f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 8604f9cfa9eSBarry Smith if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 861e56f5c9eSBarry Smith PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 862b2566f29SBarry Smith ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 863e56f5c9eSBarry Smith } 864a30f8f8cSSatish Balay PetscFunctionReturn(0); 865a30f8f8cSSatish Balay } 866a30f8f8cSSatish Balay 867dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 8689804daf3SBarry Smith #include <petscdraw.h> 8696849ba73SBarry Smith static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 870a30f8f8cSSatish Balay { 871a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 872dfbe8321SBarry Smith PetscErrorCode ierr; 873d0f46423SBarry Smith PetscInt bs = mat->rmap->bs; 8747da1fb6eSBarry Smith PetscMPIInt rank = baij->rank; 875ace3abfcSBarry Smith PetscBool iascii,isdraw; 876b0a32e0cSBarry Smith PetscViewer sviewer; 877f3ef73ceSBarry Smith PetscViewerFormat format; 878a30f8f8cSSatish Balay 879a30f8f8cSSatish Balay PetscFunctionBegin; 880251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 881251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 88232077d6dSBarry Smith if (iascii) { 883b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 884456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 885a30f8f8cSSatish Balay MatInfo info; 886ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 887a30f8f8cSSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 8881575c14dSBarry Smith ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 889b1e9c6f1SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 890a30f8f8cSSatish Balay ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 891e6dd01d4SJed Brown ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 892a30f8f8cSSatish Balay ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 893e6dd01d4SJed Brown ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 894b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 8951575c14dSBarry Smith ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 89607d81ca4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 897a30f8f8cSSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 898a30f8f8cSSatish Balay PetscFunctionReturn(0); 899fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 90077431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 901a30f8f8cSSatish Balay PetscFunctionReturn(0); 902c1490034SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 903c1490034SHong Zhang PetscFunctionReturn(0); 904a30f8f8cSSatish Balay } 905a30f8f8cSSatish Balay } 906a30f8f8cSSatish Balay 907a30f8f8cSSatish Balay if (isdraw) { 908b0a32e0cSBarry Smith PetscDraw draw; 909ace3abfcSBarry Smith PetscBool isnull; 910b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 91145f3bb6eSLisandro Dalcin ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 91245f3bb6eSLisandro Dalcin if (isnull) PetscFunctionReturn(0); 913a30f8f8cSSatish Balay } 914a30f8f8cSSatish Balay 9157da1fb6eSBarry Smith { 916a30f8f8cSSatish Balay /* assemble the entire matrix onto first processor. */ 917a30f8f8cSSatish Balay Mat A; 91865d70643SHong Zhang Mat_SeqSBAIJ *Aloc; 91965d70643SHong Zhang Mat_SeqBAIJ *Bloc; 920d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 921a30f8f8cSSatish Balay MatScalar *a; 9223e219373SBarry Smith const char *matname; 923a30f8f8cSSatish Balay 924f204ca49SKris Buschelman /* Should this be the same type as mat? */ 925ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 926a30f8f8cSSatish Balay if (!rank) { 927f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 928a30f8f8cSSatish Balay } else { 929f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 930a30f8f8cSSatish Balay } 931f204ca49SKris Buschelman ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 9320298fd71SBarry Smith ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 9332b82e772SSatish Balay ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 9343bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 935a30f8f8cSSatish Balay 936a30f8f8cSSatish Balay /* copy over the A part */ 93765d70643SHong Zhang Aloc = (Mat_SeqSBAIJ*)baij->A->data; 938a30f8f8cSSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 939785e854fSJed Brown ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 940a30f8f8cSSatish Balay 941a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 942e9f7bc9eSHong Zhang rvals[0] = bs*(baij->rstartbs + i); 94326fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 944a30f8f8cSSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 945e9f7bc9eSHong Zhang col = (baij->cstartbs+aj[j])*bs; 946a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 947dd6ea824SBarry Smith ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 94826fbe8dcSKarl Rupp col++; 94926fbe8dcSKarl Rupp a += bs; 950a30f8f8cSSatish Balay } 951a30f8f8cSSatish Balay } 952a30f8f8cSSatish Balay } 953a30f8f8cSSatish Balay /* copy over the B part */ 95465d70643SHong Zhang Bloc = (Mat_SeqBAIJ*)baij->B->data; 95565d70643SHong Zhang ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 956a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 957e9f7bc9eSHong Zhang 958e9f7bc9eSHong Zhang rvals[0] = bs*(baij->rstartbs + i); 95926fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 960a30f8f8cSSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 961a30f8f8cSSatish Balay col = baij->garray[aj[j]]*bs; 962a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 963799bb49cSHong Zhang ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 96426fbe8dcSKarl Rupp col++; 96526fbe8dcSKarl Rupp a += bs; 966a30f8f8cSSatish Balay } 967a30f8f8cSSatish Balay } 968a30f8f8cSSatish Balay } 969a30f8f8cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 970a30f8f8cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971a30f8f8cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972a30f8f8cSSatish Balay /* 973a30f8f8cSSatish Balay Everyone has to call to draw the matrix since the graphics waits are 974b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 975a30f8f8cSSatish Balay */ 9763f08860eSBarry Smith ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 977ade3a672SBarry Smith ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 9783e219373SBarry Smith if (!rank) { 979ade3a672SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 980383922c3SLisandro Dalcin ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 981a30f8f8cSSatish Balay } 9823f08860eSBarry Smith ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 9831575c14dSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 9846bf464f9SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 985a30f8f8cSSatish Balay } 986a30f8f8cSSatish Balay PetscFunctionReturn(0); 987a30f8f8cSSatish Balay } 988a30f8f8cSSatish Balay 989*618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 990*618cc2edSLisandro Dalcin #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 991d1654148SHong Zhang 992dfbe8321SBarry Smith PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 993a30f8f8cSSatish Balay { 994dfbe8321SBarry Smith PetscErrorCode ierr; 995ace3abfcSBarry Smith PetscBool iascii,isdraw,issocket,isbinary; 996a30f8f8cSSatish Balay 997a30f8f8cSSatish Balay PetscFunctionBegin; 998251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 999251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1000251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1001251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1002d1654148SHong Zhang if (iascii || isdraw || issocket) { 1003a30f8f8cSSatish Balay ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1004d1654148SHong Zhang } else if (isbinary) { 1005d1654148SHong Zhang ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1006a30f8f8cSSatish Balay } 1007a30f8f8cSSatish Balay PetscFunctionReturn(0); 1008a30f8f8cSSatish Balay } 1009a30f8f8cSSatish Balay 1010dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1011a30f8f8cSSatish Balay { 1012a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1013dfbe8321SBarry Smith PetscErrorCode ierr; 1014a30f8f8cSSatish Balay 1015a30f8f8cSSatish Balay PetscFunctionBegin; 1016a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG) 1017d0f46423SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1018a30f8f8cSSatish Balay #endif 1019a30f8f8cSSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1020a30f8f8cSSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 10216bf464f9SBarry Smith ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 10226bf464f9SBarry Smith ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1023a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 10246bc0bbbfSBarry Smith ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1025a30f8f8cSSatish Balay #else 102605b42c5fSBarry Smith ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1027a30f8f8cSSatish Balay #endif 102805b42c5fSBarry Smith ierr = PetscFree(baij->garray);CHKERRQ(ierr); 10296bf464f9SBarry Smith ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 10306bf464f9SBarry Smith ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 10316bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 10326bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 10336bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 10346bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 10356bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 10366bf464f9SBarry Smith ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 10375755ff91SHong Zhang ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 103805b42c5fSBarry Smith ierr = PetscFree(baij->barray);CHKERRQ(ierr); 103905b42c5fSBarry Smith ierr = PetscFree(baij->hd);CHKERRQ(ierr); 10406bf464f9SBarry Smith ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 10416bf464f9SBarry Smith ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 10426bf464f9SBarry Smith ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1043ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 104405b42c5fSBarry Smith ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1045a30f8f8cSSatish Balay #endif 104659ffdab8SBarry Smith ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 104759ffdab8SBarry Smith ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1048899cda47SBarry Smith ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1049bf0cc555SLisandro Dalcin ierr = PetscFree(mat->data);CHKERRQ(ierr); 1050901853e0SKris Buschelman 1051dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1052bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1053bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1054bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1055d2c30c80SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 10566214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 10576214f412SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 10586214f412SHong Zhang #endif 1059b147fbf3SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1060b147fbf3SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1061a30f8f8cSSatish Balay PetscFunctionReturn(0); 1062a30f8f8cSSatish Balay } 1063a30f8f8cSSatish Balay 1064547795f9SHong Zhang PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1065547795f9SHong Zhang { 1066547795f9SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1067547795f9SHong Zhang PetscErrorCode ierr; 1068eb1ec7c1SStefano Zampini PetscInt mbs=a->mbs,bs=A->rmap->bs; 10696de40e93SBarry Smith PetscScalar *from; 10706de40e93SBarry Smith const PetscScalar *x; 1071547795f9SHong Zhang 1072547795f9SHong Zhang PetscFunctionBegin; 1073547795f9SHong Zhang /* diagonal part */ 1074547795f9SHong Zhang ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1075547795f9SHong Zhang ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1076547795f9SHong Zhang 1077547795f9SHong Zhang /* subdiagonal part */ 1078547795f9SHong Zhang ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1079547795f9SHong Zhang 1080547795f9SHong Zhang /* copy x into the vec slvec0 */ 1081547795f9SHong Zhang ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 10826de40e93SBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1083547795f9SHong Zhang 1084580bdb30SBarry Smith ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1085547795f9SHong Zhang ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 10866de40e93SBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1087547795f9SHong Zhang 1088547795f9SHong Zhang ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089547795f9SHong Zhang ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090547795f9SHong Zhang /* supperdiagonal part */ 1091547795f9SHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1092547795f9SHong Zhang PetscFunctionReturn(0); 1093547795f9SHong Zhang } 1094547795f9SHong Zhang 1095dfbe8321SBarry Smith PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1096a9d4b620SHong Zhang { 1097a9d4b620SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1098dfbe8321SBarry Smith PetscErrorCode ierr; 1099eb1ec7c1SStefano Zampini PetscInt mbs=a->mbs,bs=A->rmap->bs; 1100d9ca1df4SBarry Smith PetscScalar *from; 1101d9ca1df4SBarry Smith const PetscScalar *x; 1102a9d4b620SHong Zhang 1103a9d4b620SHong Zhang PetscFunctionBegin; 1104a9d4b620SHong Zhang /* diagonal part */ 1105a9d4b620SHong Zhang ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1106fa22f6d0SBarry Smith ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1107a9d4b620SHong Zhang 1108a9d4b620SHong Zhang /* subdiagonal part */ 1109a9d4b620SHong Zhang ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1110fc165ae2SBarry Smith 1111a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 11121ebc52fbSHong Zhang ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1113d9ca1df4SBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1114a9d4b620SHong Zhang 1115580bdb30SBarry Smith ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1116fc165ae2SBarry Smith ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1117d9ca1df4SBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1118fc165ae2SBarry Smith 1119fc165ae2SBarry Smith ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120ca9f406cSSatish Balay ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1121a9d4b620SHong Zhang /* supperdiagonal part */ 1122a9d4b620SHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1123a9d4b620SHong Zhang PetscFunctionReturn(0); 1124a9d4b620SHong Zhang } 1125a9d4b620SHong Zhang 1126dfbe8321SBarry Smith PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1127a30f8f8cSSatish Balay { 1128a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1129dfbe8321SBarry Smith PetscErrorCode ierr; 11301302d50aSBarry Smith PetscInt nt; 1131a30f8f8cSSatish Balay 1132a30f8f8cSSatish Balay PetscFunctionBegin; 1133a30f8f8cSSatish Balay ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1134e7e72b3dSBarry Smith if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1135e7e72b3dSBarry Smith 1136a30f8f8cSSatish Balay ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1137e7e72b3dSBarry Smith if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 113865d70643SHong Zhang 1139ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140b941877fSHong Zhang /* do diagonal part */ 1141b941877fSHong Zhang ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1142b941877fSHong Zhang /* do supperdiagonal part */ 1143ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144b941877fSHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1145b941877fSHong Zhang /* do subdiagonal part */ 1146b941877fSHong Zhang ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1147ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1149a30f8f8cSSatish Balay PetscFunctionReturn(0); 1150a30f8f8cSSatish Balay } 1151a30f8f8cSSatish Balay 1152eb1ec7c1SStefano Zampini PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz) 1153eb1ec7c1SStefano Zampini { 1154eb1ec7c1SStefano Zampini Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1155eb1ec7c1SStefano Zampini PetscErrorCode ierr; 1156eb1ec7c1SStefano Zampini PetscInt mbs=a->mbs,bs=A->rmap->bs; 1157eb1ec7c1SStefano Zampini PetscScalar *from,zero=0.0; 1158eb1ec7c1SStefano Zampini const PetscScalar *x; 1159eb1ec7c1SStefano Zampini 1160eb1ec7c1SStefano Zampini PetscFunctionBegin; 1161eb1ec7c1SStefano Zampini /* diagonal part */ 1162eb1ec7c1SStefano Zampini ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1163eb1ec7c1SStefano Zampini ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1164eb1ec7c1SStefano Zampini 1165eb1ec7c1SStefano Zampini /* subdiagonal part */ 1166eb1ec7c1SStefano Zampini ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1167eb1ec7c1SStefano Zampini 1168eb1ec7c1SStefano Zampini /* copy x into the vec slvec0 */ 1169eb1ec7c1SStefano Zampini ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1170eb1ec7c1SStefano Zampini ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1171eb1ec7c1SStefano Zampini ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1172eb1ec7c1SStefano Zampini ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1173eb1ec7c1SStefano Zampini 1174eb1ec7c1SStefano Zampini ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1175eb1ec7c1SStefano Zampini ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1176eb1ec7c1SStefano Zampini ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1177eb1ec7c1SStefano Zampini 1178eb1ec7c1SStefano Zampini /* supperdiagonal part */ 1179eb1ec7c1SStefano Zampini ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1180eb1ec7c1SStefano Zampini PetscFunctionReturn(0); 1181eb1ec7c1SStefano Zampini } 1182eb1ec7c1SStefano Zampini 1183dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1184a30f8f8cSSatish Balay { 1185de8b6608SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1186dfbe8321SBarry Smith PetscErrorCode ierr; 1187d0f46423SBarry Smith PetscInt mbs=a->mbs,bs=A->rmap->bs; 1188d9ca1df4SBarry Smith PetscScalar *from,zero=0.0; 1189d9ca1df4SBarry Smith const PetscScalar *x; 1190a9d4b620SHong Zhang 1191a9d4b620SHong Zhang PetscFunctionBegin; 1192a9d4b620SHong Zhang /* diagonal part */ 1193a9d4b620SHong Zhang ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 11942dcb1b2aSMatthew Knepley ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1195a9d4b620SHong Zhang 1196a9d4b620SHong Zhang /* subdiagonal part */ 1197a9d4b620SHong Zhang ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1198a9d4b620SHong Zhang 1199a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 12001ebc52fbSHong Zhang ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1201d9ca1df4SBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1202580bdb30SBarry Smith ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 12031ebc52fbSHong Zhang ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1204a9d4b620SHong Zhang 1205ca9f406cSSatish Balay ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206d9ca1df4SBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1207ca9f406cSSatish Balay ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1208a9d4b620SHong Zhang 1209a9d4b620SHong Zhang /* supperdiagonal part */ 1210a9d4b620SHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1211a9d4b620SHong Zhang PetscFunctionReturn(0); 1212a9d4b620SHong Zhang } 1213a9d4b620SHong Zhang 1214dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1215a9d4b620SHong Zhang { 1216a9d4b620SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1217dfbe8321SBarry Smith PetscErrorCode ierr; 1218a30f8f8cSSatish Balay 1219a30f8f8cSSatish Balay PetscFunctionBegin; 1220ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1221b941877fSHong Zhang /* do diagonal part */ 1222b941877fSHong Zhang ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1223b941877fSHong Zhang /* do supperdiagonal part */ 1224ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225de8b6608SHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1226de8b6608SHong Zhang 1227b941877fSHong Zhang /* do subdiagonal part */ 1228a30f8f8cSSatish Balay ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1229ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1231a30f8f8cSSatish Balay PetscFunctionReturn(0); 1232a30f8f8cSSatish Balay } 1233a30f8f8cSSatish Balay 1234a30f8f8cSSatish Balay /* 1235a30f8f8cSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1236a30f8f8cSSatish Balay diagonal block 1237a30f8f8cSSatish Balay */ 1238dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1239a30f8f8cSSatish Balay { 1240a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1241dfbe8321SBarry Smith PetscErrorCode ierr; 1242a30f8f8cSSatish Balay 1243a30f8f8cSSatish Balay PetscFunctionBegin; 1244e32f2f54SBarry Smith /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1245a30f8f8cSSatish Balay ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1246a30f8f8cSSatish Balay PetscFunctionReturn(0); 1247a30f8f8cSSatish Balay } 1248a30f8f8cSSatish Balay 1249f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1250a30f8f8cSSatish Balay { 1251a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1252dfbe8321SBarry Smith PetscErrorCode ierr; 1253a30f8f8cSSatish Balay 1254a30f8f8cSSatish Balay PetscFunctionBegin; 1255f4df32b1SMatthew Knepley ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1256f4df32b1SMatthew Knepley ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1257a30f8f8cSSatish Balay PetscFunctionReturn(0); 1258a30f8f8cSSatish Balay } 1259a30f8f8cSSatish Balay 12601302d50aSBarry Smith PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261a30f8f8cSSatish Balay { 1262d0d4cfc2SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1263d0d4cfc2SHong Zhang PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264d0d4cfc2SHong Zhang PetscErrorCode ierr; 1265d0f46423SBarry Smith PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1266d0f46423SBarry Smith PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1267899cda47SBarry Smith PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1268d0d4cfc2SHong Zhang 1269a30f8f8cSSatish Balay PetscFunctionBegin; 1270e32f2f54SBarry Smith if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271d0d4cfc2SHong Zhang mat->getrowactive = PETSC_TRUE; 1272d0d4cfc2SHong Zhang 1273d0d4cfc2SHong Zhang if (!mat->rowvalues && (idx || v)) { 1274d0d4cfc2SHong Zhang /* 1275d0d4cfc2SHong Zhang allocate enough space to hold information from the longest row. 1276d0d4cfc2SHong Zhang */ 1277d0d4cfc2SHong Zhang Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1278d0d4cfc2SHong Zhang Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1279d0d4cfc2SHong Zhang PetscInt max = 1,mbs = mat->mbs,tmp; 1280d0d4cfc2SHong Zhang for (i=0; i<mbs; i++) { 1281d0d4cfc2SHong Zhang tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 128226fbe8dcSKarl Rupp if (max < tmp) max = tmp; 1283d0d4cfc2SHong Zhang } 1284dcca6d9dSJed Brown ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1285d0d4cfc2SHong Zhang } 1286d0d4cfc2SHong Zhang 1287e7e72b3dSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1288d0d4cfc2SHong Zhang lrow = row - brstart; /* local row index */ 1289d0d4cfc2SHong Zhang 1290d0d4cfc2SHong Zhang pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291d0d4cfc2SHong Zhang if (!v) {pvA = 0; pvB = 0;} 1292d0d4cfc2SHong Zhang if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293d0d4cfc2SHong Zhang ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294d0d4cfc2SHong Zhang ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295d0d4cfc2SHong Zhang nztot = nzA + nzB; 1296d0d4cfc2SHong Zhang 1297d0d4cfc2SHong Zhang cmap = mat->garray; 1298d0d4cfc2SHong Zhang if (v || idx) { 1299d0d4cfc2SHong Zhang if (nztot) { 1300d0d4cfc2SHong Zhang /* Sort by increasing column numbers, assuming A and B already sorted */ 1301d0d4cfc2SHong Zhang PetscInt imark = -1; 1302d0d4cfc2SHong Zhang if (v) { 1303d0d4cfc2SHong Zhang *v = v_p = mat->rowvalues; 1304d0d4cfc2SHong Zhang for (i=0; i<nzB; i++) { 1305d0d4cfc2SHong Zhang if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1306d0d4cfc2SHong Zhang else break; 1307d0d4cfc2SHong Zhang } 1308d0d4cfc2SHong Zhang imark = i; 1309d0d4cfc2SHong Zhang for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310d0d4cfc2SHong Zhang for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311d0d4cfc2SHong Zhang } 1312d0d4cfc2SHong Zhang if (idx) { 1313d0d4cfc2SHong Zhang *idx = idx_p = mat->rowindices; 1314d0d4cfc2SHong Zhang if (imark > -1) { 1315d0d4cfc2SHong Zhang for (i=0; i<imark; i++) { 1316d0d4cfc2SHong Zhang idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1317d0d4cfc2SHong Zhang } 1318d0d4cfc2SHong Zhang } else { 1319d0d4cfc2SHong Zhang for (i=0; i<nzB; i++) { 132026fbe8dcSKarl Rupp if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1321d0d4cfc2SHong Zhang else break; 1322d0d4cfc2SHong Zhang } 1323d0d4cfc2SHong Zhang imark = i; 1324d0d4cfc2SHong Zhang } 1325d0d4cfc2SHong Zhang for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1326d0d4cfc2SHong Zhang for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1327d0d4cfc2SHong Zhang } 1328d0d4cfc2SHong Zhang } else { 1329d0d4cfc2SHong Zhang if (idx) *idx = 0; 1330d0d4cfc2SHong Zhang if (v) *v = 0; 1331d0d4cfc2SHong Zhang } 1332d0d4cfc2SHong Zhang } 1333d0d4cfc2SHong Zhang *nz = nztot; 1334d0d4cfc2SHong Zhang ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335d0d4cfc2SHong Zhang ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336a30f8f8cSSatish Balay PetscFunctionReturn(0); 1337a30f8f8cSSatish Balay } 1338a30f8f8cSSatish Balay 13391302d50aSBarry Smith PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1340a30f8f8cSSatish Balay { 1341a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1342a30f8f8cSSatish Balay 1343a30f8f8cSSatish Balay PetscFunctionBegin; 1344e7e72b3dSBarry Smith if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1345a30f8f8cSSatish Balay baij->getrowactive = PETSC_FALSE; 1346a30f8f8cSSatish Balay PetscFunctionReturn(0); 1347a30f8f8cSSatish Balay } 1348a30f8f8cSSatish Balay 1349d0d4cfc2SHong Zhang PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1350d0d4cfc2SHong Zhang { 1351d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1352d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1353d0d4cfc2SHong Zhang 1354d0d4cfc2SHong Zhang PetscFunctionBegin; 1355d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_TRUE; 1356d0d4cfc2SHong Zhang PetscFunctionReturn(0); 1357d0d4cfc2SHong Zhang } 1358d0d4cfc2SHong Zhang PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1359d0d4cfc2SHong Zhang { 1360d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1361d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1362d0d4cfc2SHong Zhang 1363d0d4cfc2SHong Zhang PetscFunctionBegin; 1364d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_FALSE; 1365d0d4cfc2SHong Zhang PetscFunctionReturn(0); 1366d0d4cfc2SHong Zhang } 1367d0d4cfc2SHong Zhang 136899cafbc1SBarry Smith PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 136999cafbc1SBarry Smith { 137099cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 137199cafbc1SBarry Smith PetscErrorCode ierr; 137299cafbc1SBarry Smith 137399cafbc1SBarry Smith PetscFunctionBegin; 137499cafbc1SBarry Smith ierr = MatRealPart(a->A);CHKERRQ(ierr); 137599cafbc1SBarry Smith ierr = MatRealPart(a->B);CHKERRQ(ierr); 137699cafbc1SBarry Smith PetscFunctionReturn(0); 137799cafbc1SBarry Smith } 137899cafbc1SBarry Smith 137999cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 138099cafbc1SBarry Smith { 138199cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 138299cafbc1SBarry Smith PetscErrorCode ierr; 138399cafbc1SBarry Smith 138499cafbc1SBarry Smith PetscFunctionBegin; 138599cafbc1SBarry Smith ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 138699cafbc1SBarry Smith ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 138799cafbc1SBarry Smith PetscFunctionReturn(0); 138899cafbc1SBarry Smith } 138999cafbc1SBarry Smith 13907dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 139136032a97SHong Zhang Input: isrow - distributed(parallel), 139236032a97SHong Zhang iscol_local - locally owned (seq) 139336032a97SHong Zhang */ 139436032a97SHong Zhang PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 13958f46ffcaSHong Zhang { 13968f46ffcaSHong Zhang PetscErrorCode ierr; 13978f46ffcaSHong Zhang PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 13988f46ffcaSHong Zhang const PetscInt *ptr1,*ptr2; 139936032a97SHong Zhang 140036032a97SHong Zhang PetscFunctionBegin; 14018f46ffcaSHong Zhang ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 14028f46ffcaSHong Zhang ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 14031098a8e8SHong Zhang if (sz1 > sz2) { 14041098a8e8SHong Zhang *flg = PETSC_FALSE; 14051098a8e8SHong Zhang PetscFunctionReturn(0); 14061098a8e8SHong Zhang } 14078f46ffcaSHong Zhang 14088f46ffcaSHong Zhang ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 14098f46ffcaSHong Zhang ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 14108f46ffcaSHong Zhang 14118f46ffcaSHong Zhang ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 14128f46ffcaSHong Zhang ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1413580bdb30SBarry Smith ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr); 1414580bdb30SBarry Smith ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr); 14158f46ffcaSHong Zhang ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 14168f46ffcaSHong Zhang ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 14178f46ffcaSHong Zhang 14188f46ffcaSHong Zhang nmatch=0; 14198f46ffcaSHong Zhang k = 0; 14208f46ffcaSHong Zhang for (i=0; i<sz1; i++){ 14218f46ffcaSHong Zhang for (j=k; j<sz2; j++){ 14228f46ffcaSHong Zhang if (a1[i] == a2[j]) { 14238f46ffcaSHong Zhang k = j; nmatch++; 14248f46ffcaSHong Zhang break; 14258f46ffcaSHong Zhang } 14268f46ffcaSHong Zhang } 14278f46ffcaSHong Zhang } 14288f46ffcaSHong Zhang ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 14298f46ffcaSHong Zhang ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 14308f46ffcaSHong Zhang ierr = PetscFree(a1);CHKERRQ(ierr); 14318f46ffcaSHong Zhang ierr = PetscFree(a2);CHKERRQ(ierr); 14321098a8e8SHong Zhang if (nmatch < sz1) { 14331098a8e8SHong Zhang *flg = PETSC_FALSE; 14341098a8e8SHong Zhang } else { 14351098a8e8SHong Zhang *flg = PETSC_TRUE; 14361098a8e8SHong Zhang } 143736032a97SHong Zhang PetscFunctionReturn(0); 14388f46ffcaSHong Zhang } 143936032a97SHong Zhang 14407dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 144136032a97SHong Zhang { 144236032a97SHong Zhang PetscErrorCode ierr; 144336032a97SHong Zhang IS iscol_local; 144436032a97SHong Zhang PetscInt csize; 144536032a97SHong Zhang PetscBool isequal; 144636032a97SHong Zhang 144736032a97SHong Zhang PetscFunctionBegin; 144836032a97SHong Zhang ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 144936032a97SHong Zhang if (call == MAT_REUSE_MATRIX) { 145036032a97SHong Zhang ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 145136032a97SHong Zhang if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 145236032a97SHong Zhang } else { 145336032a97SHong Zhang ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 145436032a97SHong Zhang ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 145536032a97SHong Zhang if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 14568f46ffcaSHong Zhang } 14578f46ffcaSHong Zhang 14587dae84e0SHong Zhang /* now call MatCreateSubMatrix_MPIBAIJ() */ 14597dae84e0SHong Zhang ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 14608f46ffcaSHong Zhang if (call == MAT_INITIAL_MATRIX) { 14618f46ffcaSHong Zhang ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 14628f46ffcaSHong Zhang ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 14638f46ffcaSHong Zhang } 14648f46ffcaSHong Zhang PetscFunctionReturn(0); 14658f46ffcaSHong Zhang } 14668f46ffcaSHong Zhang 1467dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1468a30f8f8cSSatish Balay { 1469a30f8f8cSSatish Balay Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1470dfbe8321SBarry Smith PetscErrorCode ierr; 1471a30f8f8cSSatish Balay 1472a30f8f8cSSatish Balay PetscFunctionBegin; 1473a30f8f8cSSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1474a30f8f8cSSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1475a30f8f8cSSatish Balay PetscFunctionReturn(0); 1476a30f8f8cSSatish Balay } 1477a30f8f8cSSatish Balay 1478dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1479a30f8f8cSSatish Balay { 1480a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1481a30f8f8cSSatish Balay Mat A = a->A,B = a->B; 1482dfbe8321SBarry Smith PetscErrorCode ierr; 14833966268fSBarry Smith PetscLogDouble isend[5],irecv[5]; 1484a30f8f8cSSatish Balay 1485a30f8f8cSSatish Balay PetscFunctionBegin; 1486d0f46423SBarry Smith info->block_size = (PetscReal)matin->rmap->bs; 148726fbe8dcSKarl Rupp 1488a30f8f8cSSatish Balay ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 148926fbe8dcSKarl Rupp 1490a30f8f8cSSatish Balay isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1491a30f8f8cSSatish Balay isend[3] = info->memory; isend[4] = info->mallocs; 149226fbe8dcSKarl Rupp 1493a30f8f8cSSatish Balay ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 149426fbe8dcSKarl Rupp 1495a30f8f8cSSatish Balay isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1496a30f8f8cSSatish Balay isend[3] += info->memory; isend[4] += info->mallocs; 1497a30f8f8cSSatish Balay if (flag == MAT_LOCAL) { 1498a30f8f8cSSatish Balay info->nz_used = isend[0]; 1499a30f8f8cSSatish Balay info->nz_allocated = isend[1]; 1500a30f8f8cSSatish Balay info->nz_unneeded = isend[2]; 1501a30f8f8cSSatish Balay info->memory = isend[3]; 1502a30f8f8cSSatish Balay info->mallocs = isend[4]; 1503a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 15043966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 150526fbe8dcSKarl Rupp 1506a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1507a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1508a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1509a30f8f8cSSatish Balay info->memory = irecv[3]; 1510a30f8f8cSSatish Balay info->mallocs = irecv[4]; 1511a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 15123966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 151326fbe8dcSKarl Rupp 1514a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1515a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1516a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1517a30f8f8cSSatish Balay info->memory = irecv[3]; 1518a30f8f8cSSatish Balay info->mallocs = irecv[4]; 1519f23aa3ddSBarry Smith } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1520a30f8f8cSSatish Balay info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1521a30f8f8cSSatish Balay info->fill_ratio_needed = 0; 1522a30f8f8cSSatish Balay info->factor_mallocs = 0; 1523a30f8f8cSSatish Balay PetscFunctionReturn(0); 1524a30f8f8cSSatish Balay } 1525a30f8f8cSSatish Balay 1526ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1527a30f8f8cSSatish Balay { 1528a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1529d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1530dfbe8321SBarry Smith PetscErrorCode ierr; 1531a30f8f8cSSatish Balay 1532a30f8f8cSSatish Balay PetscFunctionBegin; 1533e98b92d7SKris Buschelman switch (op) { 1534512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 1535e98b92d7SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 153628b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1537a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 1538c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1539e98b92d7SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 154043674050SBarry Smith MatCheckPreallocated(A,1); 15414e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 15424e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1543e98b92d7SKris Buschelman break; 1544e98b92d7SKris Buschelman case MAT_ROW_ORIENTED: 154543674050SBarry Smith MatCheckPreallocated(A,1); 15464e0d8c25SBarry Smith a->roworiented = flg; 154726fbe8dcSKarl Rupp 15484e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 15494e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1550e98b92d7SKris Buschelman break; 15514e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 1552071fcb05SBarry Smith case MAT_SORTED_FULL: 1553290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1554e98b92d7SKris Buschelman break; 1555e98b92d7SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15564e0d8c25SBarry Smith a->donotstash = flg; 1557e98b92d7SKris Buschelman break; 1558e98b92d7SKris Buschelman case MAT_USE_HASH_TABLE: 15594e0d8c25SBarry Smith a->ht_flag = flg; 1560e98b92d7SKris Buschelman break; 15619a4540c5SBarry Smith case MAT_HERMITIAN: 156243674050SBarry Smith MatCheckPreallocated(A,1); 1563eeffb40dSHong Zhang ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 15640f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX) 1565eb1ec7c1SStefano Zampini if (flg) { /* need different mat-vec ops */ 1566547795f9SHong Zhang A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1567eb1ec7c1SStefano Zampini A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1568eb1ec7c1SStefano Zampini A->ops->multtranspose = NULL; 1569eb1ec7c1SStefano Zampini A->ops->multtransposeadd = NULL; 1570eb1ec7c1SStefano Zampini A->symmetric = PETSC_FALSE; 1571eb1ec7c1SStefano Zampini } 15720f2140c7SStefano Zampini #endif 1573eeffb40dSHong Zhang break; 1574ffa07934SHong Zhang case MAT_SPD: 157577e54ba9SKris Buschelman case MAT_SYMMETRIC: 157643674050SBarry Smith MatCheckPreallocated(A,1); 1577eeffb40dSHong Zhang ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1578eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 1579eb1ec7c1SStefano Zampini if (flg) { /* restore to use default mat-vec ops */ 1580eb1ec7c1SStefano Zampini A->ops->mult = MatMult_MPISBAIJ; 1581eb1ec7c1SStefano Zampini A->ops->multadd = MatMultAdd_MPISBAIJ; 1582eb1ec7c1SStefano Zampini A->ops->multtranspose = MatMult_MPISBAIJ; 1583eb1ec7c1SStefano Zampini A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1584eb1ec7c1SStefano Zampini } 1585eb1ec7c1SStefano Zampini #endif 1586eeffb40dSHong Zhang break; 158777e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 158843674050SBarry Smith MatCheckPreallocated(A,1); 1589eeffb40dSHong Zhang ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1590eeffb40dSHong Zhang break; 15919a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1592e32f2f54SBarry Smith if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1593290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 159477e54ba9SKris Buschelman break; 1595d0d4cfc2SHong Zhang case MAT_IGNORE_LOWER_TRIANGULAR: 15964e0d8c25SBarry Smith aA->ignore_ltriangular = flg; 1597d0d4cfc2SHong Zhang break; 1598d0d4cfc2SHong Zhang case MAT_ERROR_LOWER_TRIANGULAR: 15994e0d8c25SBarry Smith aA->ignore_ltriangular = flg; 1600d0d4cfc2SHong Zhang break; 1601d0d4cfc2SHong Zhang case MAT_GETROW_UPPERTRIANGULAR: 16024e0d8c25SBarry Smith aA->getrow_utriangular = flg; 1603d0d4cfc2SHong Zhang break; 1604e98b92d7SKris Buschelman default: 1605e32f2f54SBarry Smith SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1606a30f8f8cSSatish Balay } 1607a30f8f8cSSatish Balay PetscFunctionReturn(0); 1608a30f8f8cSSatish Balay } 1609a30f8f8cSSatish Balay 1610fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1611a30f8f8cSSatish Balay { 1612dfbe8321SBarry Smith PetscErrorCode ierr; 16136e111a19SKarl Rupp 1614a30f8f8cSSatish Balay PetscFunctionBegin; 1615cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 1616999d9058SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1617cf37664fSBarry Smith } else if (reuse == MAT_REUSE_MATRIX) { 1618cf37664fSBarry Smith ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1619fc4dec0aSBarry Smith } 16208115998fSBarry Smith PetscFunctionReturn(0); 1621a30f8f8cSSatish Balay } 1622a30f8f8cSSatish Balay 1623dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1624a30f8f8cSSatish Balay { 1625a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1626a30f8f8cSSatish Balay Mat a = baij->A, b=baij->B; 1627dfbe8321SBarry Smith PetscErrorCode ierr; 16285e90f9d9SHong Zhang PetscInt nv,m,n; 1629ace3abfcSBarry Smith PetscBool flg; 1630a30f8f8cSSatish Balay 1631a30f8f8cSSatish Balay PetscFunctionBegin; 1632a30f8f8cSSatish Balay if (ll != rr) { 1633b3bf805bSHong Zhang ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1634e7e72b3dSBarry Smith if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1635a30f8f8cSSatish Balay } 1636b3bf805bSHong Zhang if (!ll) PetscFunctionReturn(0); 1637b3bf805bSHong Zhang 16385e90f9d9SHong Zhang ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1639e32f2f54SBarry Smith if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1640b3bf805bSHong Zhang 16415e90f9d9SHong Zhang ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1642e32f2f54SBarry Smith if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 16435e90f9d9SHong Zhang 1644ca9f406cSSatish Balay ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 16455e90f9d9SHong Zhang 16465e90f9d9SHong Zhang /* left diagonalscale the off-diagonal part */ 16470298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 16485e90f9d9SHong Zhang 16495e90f9d9SHong Zhang /* scale the diagonal part */ 1650a30f8f8cSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1651a30f8f8cSSatish Balay 16525e90f9d9SHong Zhang /* right diagonalscale the off-diagonal part */ 1653ca9f406cSSatish Balay ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 16540298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1655a30f8f8cSSatish Balay PetscFunctionReturn(0); 1656a30f8f8cSSatish Balay } 1657a30f8f8cSSatish Balay 1658dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1659a30f8f8cSSatish Balay { 1660f3566a2aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1661dfbe8321SBarry Smith PetscErrorCode ierr; 1662a30f8f8cSSatish Balay 1663a30f8f8cSSatish Balay PetscFunctionBegin; 1664a30f8f8cSSatish Balay ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1665a30f8f8cSSatish Balay PetscFunctionReturn(0); 1666a30f8f8cSSatish Balay } 1667a30f8f8cSSatish Balay 16686849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1669a30f8f8cSSatish Balay 1670ace3abfcSBarry Smith PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1671a30f8f8cSSatish Balay { 1672a30f8f8cSSatish Balay Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1673a30f8f8cSSatish Balay Mat a,b,c,d; 1674ace3abfcSBarry Smith PetscBool flg; 1675dfbe8321SBarry Smith PetscErrorCode ierr; 1676a30f8f8cSSatish Balay 1677a30f8f8cSSatish Balay PetscFunctionBegin; 1678a30f8f8cSSatish Balay a = matA->A; b = matA->B; 1679a30f8f8cSSatish Balay c = matB->A; d = matB->B; 1680a30f8f8cSSatish Balay 1681a30f8f8cSSatish Balay ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1682abc0a331SBarry Smith if (flg) { 1683a30f8f8cSSatish Balay ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1684a30f8f8cSSatish Balay } 1685b2566f29SBarry Smith ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1686a30f8f8cSSatish Balay PetscFunctionReturn(0); 1687a30f8f8cSSatish Balay } 1688a30f8f8cSSatish Balay 16893c896bc6SHong Zhang PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 16903c896bc6SHong Zhang { 16913c896bc6SHong Zhang PetscErrorCode ierr; 16924c7a3774SStefano Zampini PetscBool isbaij; 16933c896bc6SHong Zhang 16943c896bc6SHong Zhang PetscFunctionBegin; 16954c7a3774SStefano Zampini ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 16964c7a3774SStefano Zampini if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 16973c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 16983c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1699d0d4cfc2SHong Zhang ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 17003c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1701d0d4cfc2SHong Zhang ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 17023c896bc6SHong Zhang } else { 17034c7a3774SStefano Zampini Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 17044c7a3774SStefano Zampini Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 17054c7a3774SStefano Zampini 17063c896bc6SHong Zhang ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 17073c896bc6SHong Zhang ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 17083c896bc6SHong Zhang } 1709cdc753b6SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 17103c896bc6SHong Zhang PetscFunctionReturn(0); 17113c896bc6SHong Zhang } 17123c896bc6SHong Zhang 17134994cf47SJed Brown PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1714273d9f13SBarry Smith { 1715dfbe8321SBarry Smith PetscErrorCode ierr; 1716273d9f13SBarry Smith 1717273d9f13SBarry Smith PetscFunctionBegin; 1718535b19f3SBarry Smith ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1719273d9f13SBarry Smith PetscFunctionReturn(0); 1720273d9f13SBarry Smith } 1721a5e6ed63SBarry Smith 17224fe895cdSHong Zhang PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 17234fe895cdSHong Zhang { 17244fe895cdSHong Zhang PetscErrorCode ierr; 17254fe895cdSHong Zhang Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 17264fe895cdSHong Zhang PetscBLASInt bnz,one=1; 17274fe895cdSHong Zhang Mat_SeqSBAIJ *xa,*ya; 17284fe895cdSHong Zhang Mat_SeqBAIJ *xb,*yb; 17294fe895cdSHong Zhang 17304fe895cdSHong Zhang PetscFunctionBegin; 17314fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 17324fe895cdSHong Zhang PetscScalar alpha = a; 17334fe895cdSHong Zhang xa = (Mat_SeqSBAIJ*)xx->A->data; 17344fe895cdSHong Zhang ya = (Mat_SeqSBAIJ*)yy->A->data; 1735c5df96a5SBarry Smith ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 17368b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 17374fe895cdSHong Zhang xb = (Mat_SeqBAIJ*)xx->B->data; 17384fe895cdSHong Zhang yb = (Mat_SeqBAIJ*)yy->B->data; 1739c5df96a5SBarry Smith ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 17408b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1741a3fa217bSJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1742ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1743ab784542SHong Zhang ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1744ab784542SHong Zhang ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1745ab784542SHong Zhang ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 17464fe895cdSHong Zhang } else { 17474de5dceeSHong Zhang Mat B; 17484de5dceeSHong Zhang PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 17494de5dceeSHong Zhang if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1750d0d4cfc2SHong Zhang ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 17514de5dceeSHong Zhang ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 17524de5dceeSHong Zhang ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 17534de5dceeSHong Zhang ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 17544de5dceeSHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 17554de5dceeSHong Zhang ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 17564de5dceeSHong Zhang ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 17574de5dceeSHong Zhang ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 17584de5dceeSHong Zhang ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 17594de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 17604de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 17614de5dceeSHong Zhang ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 17624de5dceeSHong Zhang ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 176328be2f97SBarry Smith ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 17644de5dceeSHong Zhang ierr = PetscFree(nnz_d);CHKERRQ(ierr); 17654de5dceeSHong Zhang ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1766d0d4cfc2SHong Zhang ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 17674de5dceeSHong Zhang ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 17684fe895cdSHong Zhang } 17694fe895cdSHong Zhang PetscFunctionReturn(0); 17704fe895cdSHong Zhang } 17714fe895cdSHong Zhang 17727dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1773a5e6ed63SBarry Smith { 17746849ba73SBarry Smith PetscErrorCode ierr; 17751302d50aSBarry Smith PetscInt i; 1776afebec48SHong Zhang PetscBool flg; 1777a5e6ed63SBarry Smith 17786849ba73SBarry Smith PetscFunctionBegin; 17797dae84e0SHong Zhang ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1780a5e6ed63SBarry Smith for (i=0; i<n; i++) { 1781a5e6ed63SBarry Smith ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1782afebec48SHong Zhang if (!flg) { 1783b2fa50c1SHong Zhang ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1784a5e6ed63SBarry Smith } 17854dcd73b1SHong Zhang } 1786a5e6ed63SBarry Smith PetscFunctionReturn(0); 1787a5e6ed63SBarry Smith } 1788a5e6ed63SBarry Smith 17897d68702bSBarry Smith PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 17907d68702bSBarry Smith { 17917d68702bSBarry Smith PetscErrorCode ierr; 17927d68702bSBarry Smith Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 17936f33a894SBarry Smith Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 17947d68702bSBarry Smith 17957d68702bSBarry Smith PetscFunctionBegin; 17966f33a894SBarry Smith if (!Y->preallocated) { 17977d68702bSBarry Smith ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 17986f33a894SBarry Smith } else if (!aij->nz) { 1799b83222d8SBarry Smith PetscInt nonew = aij->nonew; 18006f33a894SBarry Smith ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1801b83222d8SBarry Smith aij->nonew = nonew; 18027d68702bSBarry Smith } 18037d68702bSBarry Smith ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 18047d68702bSBarry Smith PetscFunctionReturn(0); 18057d68702bSBarry Smith } 18067d68702bSBarry Smith 18073b49f96aSBarry Smith PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 18083b49f96aSBarry Smith { 18093b49f96aSBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 18103b49f96aSBarry Smith PetscErrorCode ierr; 18113b49f96aSBarry Smith 18123b49f96aSBarry Smith PetscFunctionBegin; 18133b49f96aSBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 18143b49f96aSBarry Smith ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 18153b49f96aSBarry Smith if (d) { 18163b49f96aSBarry Smith PetscInt rstart; 18173b49f96aSBarry Smith ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 18183b49f96aSBarry Smith *d += rstart/A->rmap->bs; 18193b49f96aSBarry Smith 18203b49f96aSBarry Smith } 18213b49f96aSBarry Smith PetscFunctionReturn(0); 18223b49f96aSBarry Smith } 18233b49f96aSBarry Smith 1824a5b7ff6bSBarry Smith PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1825a5b7ff6bSBarry Smith { 1826a5b7ff6bSBarry Smith PetscFunctionBegin; 1827a5b7ff6bSBarry Smith *a = ((Mat_MPISBAIJ*)A->data)->A; 1828a5b7ff6bSBarry Smith PetscFunctionReturn(0); 1829a5b7ff6bSBarry Smith } 18303b49f96aSBarry Smith 1831a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/ 18323964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1833a30f8f8cSSatish Balay MatGetRow_MPISBAIJ, 1834a30f8f8cSSatish Balay MatRestoreRow_MPISBAIJ, 1835a9d4b620SHong Zhang MatMult_MPISBAIJ, 183697304618SKris Buschelman /* 4*/ MatMultAdd_MPISBAIJ, 1837431c96f7SBarry Smith MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1838431c96f7SBarry Smith MatMultAdd_MPISBAIJ, 1839a30f8f8cSSatish Balay 0, 1840a30f8f8cSSatish Balay 0, 1841a30f8f8cSSatish Balay 0, 184297304618SKris Buschelman /* 10*/ 0, 1843a30f8f8cSSatish Balay 0, 1844a30f8f8cSSatish Balay 0, 184541f059aeSBarry Smith MatSOR_MPISBAIJ, 1846a30f8f8cSSatish Balay MatTranspose_MPISBAIJ, 184797304618SKris Buschelman /* 15*/ MatGetInfo_MPISBAIJ, 1848a30f8f8cSSatish Balay MatEqual_MPISBAIJ, 1849a30f8f8cSSatish Balay MatGetDiagonal_MPISBAIJ, 1850a30f8f8cSSatish Balay MatDiagonalScale_MPISBAIJ, 1851a30f8f8cSSatish Balay MatNorm_MPISBAIJ, 185297304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPISBAIJ, 1853a30f8f8cSSatish Balay MatAssemblyEnd_MPISBAIJ, 1854a30f8f8cSSatish Balay MatSetOption_MPISBAIJ, 1855a30f8f8cSSatish Balay MatZeroEntries_MPISBAIJ, 1856d519adbfSMatthew Knepley /* 24*/ 0, 1857a30f8f8cSSatish Balay 0, 1858a30f8f8cSSatish Balay 0, 1859a30f8f8cSSatish Balay 0, 1860a30f8f8cSSatish Balay 0, 18614994cf47SJed Brown /* 29*/ MatSetUp_MPISBAIJ, 1862b5df2d14SHong Zhang 0, 1863a30f8f8cSSatish Balay 0, 1864a5b7ff6bSBarry Smith MatGetDiagonalBlock_MPISBAIJ, 1865a30f8f8cSSatish Balay 0, 1866d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPISBAIJ, 1867a30f8f8cSSatish Balay 0, 1868a30f8f8cSSatish Balay 0, 1869a30f8f8cSSatish Balay 0, 1870a30f8f8cSSatish Balay 0, 1871d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPISBAIJ, 18727dae84e0SHong Zhang MatCreateSubMatrices_MPISBAIJ, 1873d94109b8SHong Zhang MatIncreaseOverlap_MPISBAIJ, 1874a30f8f8cSSatish Balay MatGetValues_MPISBAIJ, 18753c896bc6SHong Zhang MatCopy_MPISBAIJ, 1876d519adbfSMatthew Knepley /* 44*/ 0, 1877a30f8f8cSSatish Balay MatScale_MPISBAIJ, 18787d68702bSBarry Smith MatShift_MPISBAIJ, 1879a30f8f8cSSatish Balay 0, 1880a30f8f8cSSatish Balay 0, 1881f73d5cc4SBarry Smith /* 49*/ 0, 1882a30f8f8cSSatish Balay 0, 1883a30f8f8cSSatish Balay 0, 1884a30f8f8cSSatish Balay 0, 1885a30f8f8cSSatish Balay 0, 1886d519adbfSMatthew Knepley /* 54*/ 0, 1887a30f8f8cSSatish Balay 0, 1888a30f8f8cSSatish Balay MatSetUnfactored_MPISBAIJ, 1889a30f8f8cSSatish Balay 0, 1890a30f8f8cSSatish Balay MatSetValuesBlocked_MPISBAIJ, 18917dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1892a30f8f8cSSatish Balay 0, 1893a30f8f8cSSatish Balay 0, 1894357abbc8SBarry Smith 0, 189524d5174aSHong Zhang 0, 1896d519adbfSMatthew Knepley /* 64*/ 0, 189724d5174aSHong Zhang 0, 189824d5174aSHong Zhang 0, 189924d5174aSHong Zhang 0, 190024d5174aSHong Zhang 0, 1901d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 190224d5174aSHong Zhang 0, 190328d58a37SPierre Jolivet MatConvert_MPISBAIJ_Basic, 190497304618SKris Buschelman 0, 190597304618SKris Buschelman 0, 1906d519adbfSMatthew Knepley /* 74*/ 0, 190797304618SKris Buschelman 0, 190897304618SKris Buschelman 0, 190997304618SKris Buschelman 0, 191097304618SKris Buschelman 0, 1911d519adbfSMatthew Knepley /* 79*/ 0, 191297304618SKris Buschelman 0, 191397304618SKris Buschelman 0, 191497304618SKris Buschelman 0, 19155bba2384SShri Abhyankar MatLoad_MPISBAIJ, 1916d519adbfSMatthew Knepley /* 84*/ 0, 1917865e5f61SKris Buschelman 0, 1918865e5f61SKris Buschelman 0, 1919865e5f61SKris Buschelman 0, 1920865e5f61SKris Buschelman 0, 1921d519adbfSMatthew Knepley /* 89*/ 0, 1922865e5f61SKris Buschelman 0, 1923865e5f61SKris Buschelman 0, 1924865e5f61SKris Buschelman 0, 1925865e5f61SKris Buschelman 0, 1926d519adbfSMatthew Knepley /* 94*/ 0, 1927865e5f61SKris Buschelman 0, 1928865e5f61SKris Buschelman 0, 192999cafbc1SBarry Smith 0, 193099cafbc1SBarry Smith 0, 1931d519adbfSMatthew Knepley /* 99*/ 0, 193299cafbc1SBarry Smith 0, 193399cafbc1SBarry Smith 0, 193499cafbc1SBarry Smith 0, 193599cafbc1SBarry Smith 0, 1936d519adbfSMatthew Knepley /*104*/ 0, 193799cafbc1SBarry Smith MatRealPart_MPISBAIJ, 1938d0d4cfc2SHong Zhang MatImaginaryPart_MPISBAIJ, 1939d0d4cfc2SHong Zhang MatGetRowUpperTriangular_MPISBAIJ, 194095936485SShri Abhyankar MatRestoreRowUpperTriangular_MPISBAIJ, 194195936485SShri Abhyankar /*109*/ 0, 194295936485SShri Abhyankar 0, 194395936485SShri Abhyankar 0, 194495936485SShri Abhyankar 0, 19453b49f96aSBarry Smith MatMissingDiagonal_MPISBAIJ, 194695936485SShri Abhyankar /*114*/ 0, 194795936485SShri Abhyankar 0, 194895936485SShri Abhyankar 0, 194995936485SShri Abhyankar 0, 195095936485SShri Abhyankar 0, 195195936485SShri Abhyankar /*119*/ 0, 195295936485SShri Abhyankar 0, 195395936485SShri Abhyankar 0, 19543964eb88SJed Brown 0, 19553964eb88SJed Brown 0, 19563964eb88SJed Brown /*124*/ 0, 19573964eb88SJed Brown 0, 19583964eb88SJed Brown 0, 19593964eb88SJed Brown 0, 19603964eb88SJed Brown 0, 19613964eb88SJed Brown /*129*/ 0, 19623964eb88SJed Brown 0, 19633964eb88SJed Brown 0, 19643964eb88SJed Brown 0, 19653964eb88SJed Brown 0, 19663964eb88SJed Brown /*134*/ 0, 19673964eb88SJed Brown 0, 19683964eb88SJed Brown 0, 19693964eb88SJed Brown 0, 19703964eb88SJed Brown 0, 197146533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 1972f9426fe0SMark Adams 0, 197359f5e6ceSHong Zhang 0, 197459f5e6ceSHong Zhang 0, 197559f5e6ceSHong Zhang 0, 197659f5e6ceSHong Zhang /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 197799cafbc1SBarry Smith }; 1978a30f8f8cSSatish Balay 1979b2573a8aSBarry Smith PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1980a23d5eceSKris Buschelman { 1981476417e5SBarry Smith Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1982dfbe8321SBarry Smith PetscErrorCode ierr; 1983535b19f3SBarry Smith PetscInt i,mbs,Mbs; 19845d2a9ed1SStefano Zampini PetscMPIInt size; 1985a23d5eceSKris Buschelman 1986a23d5eceSKris Buschelman PetscFunctionBegin; 198733d57670SJed Brown ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 198826283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 198926283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1990e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1991476417e5SBarry Smith if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N); 1992476417e5SBarry Smith if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n); 1993899cda47SBarry Smith 1994d0f46423SBarry Smith mbs = B->rmap->n/bs; 1995d0f46423SBarry Smith Mbs = B->rmap->N/bs; 1996c2fc9fa9SBarry Smith if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1997a23d5eceSKris Buschelman 1998d0f46423SBarry Smith B->rmap->bs = bs; 1999a23d5eceSKris Buschelman b->bs2 = bs*bs; 2000a23d5eceSKris Buschelman b->mbs = mbs; 2001a23d5eceSKris Buschelman b->Mbs = Mbs; 2002de64b629SHong Zhang b->nbs = B->cmap->n/bs; 2003de64b629SHong Zhang b->Nbs = B->cmap->N/bs; 2004a23d5eceSKris Buschelman 2005a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2006d0f46423SBarry Smith b->rangebs[i] = B->rmap->range[i]/bs; 2007a23d5eceSKris Buschelman } 2008d0f46423SBarry Smith b->rstartbs = B->rmap->rstart/bs; 2009d0f46423SBarry Smith b->rendbs = B->rmap->rend/bs; 2010a23d5eceSKris Buschelman 2011d0f46423SBarry Smith b->cstartbs = B->cmap->rstart/bs; 2012d0f46423SBarry Smith b->cendbs = B->cmap->rend/bs; 2013a23d5eceSKris Buschelman 2014cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE) 2015cb7b82ddSBarry Smith ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2016cb7b82ddSBarry Smith #else 2017cb7b82ddSBarry Smith ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2018cb7b82ddSBarry Smith #endif 2019cb7b82ddSBarry Smith ierr = PetscFree(b->garray);CHKERRQ(ierr); 2020cb7b82ddSBarry Smith ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2021cb7b82ddSBarry Smith ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2022cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2023cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2024cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2025cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2026cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2027cb7b82ddSBarry Smith ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2028cb7b82ddSBarry Smith 2029cb7b82ddSBarry Smith /* Because the B will have been resized we simply destroy it and create a new one each time */ 20305d2a9ed1SStefano Zampini ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2031cb7b82ddSBarry Smith ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2032cb7b82ddSBarry Smith ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 20335d2a9ed1SStefano Zampini ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2034cb7b82ddSBarry Smith ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2035cb7b82ddSBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2036cb7b82ddSBarry Smith 2037526dfc15SBarry Smith if (!B->preallocated) { 2038f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2039d0f46423SBarry Smith ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 20409c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 20413bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2042ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2043526dfc15SBarry Smith } 2044a23d5eceSKris Buschelman 2045526dfc15SBarry Smith ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2046526dfc15SBarry Smith ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 204726fbe8dcSKarl Rupp 2048526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 2049cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 2050cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 2051a23d5eceSKris Buschelman PetscFunctionReturn(0); 2052a23d5eceSKris Buschelman } 2053a23d5eceSKris Buschelman 2054dfb205c3SBarry Smith PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2055dfb205c3SBarry Smith { 205602106b30SBarry Smith PetscInt m,rstart,cend; 20570cd7f59aSBarry Smith PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0; 2058dfb205c3SBarry Smith const PetscInt *JJ =0; 2059dfb205c3SBarry Smith PetscScalar *values=0; 2060bb80cfbbSStefano Zampini PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 2061dfb205c3SBarry Smith PetscErrorCode ierr; 20623bd0feecSPierre Jolivet PetscBool nooffprocentries; 2063dfb205c3SBarry Smith 2064dfb205c3SBarry Smith PetscFunctionBegin; 2065ce94432eSBarry Smith if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2066dfb205c3SBarry Smith ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2067dfb205c3SBarry Smith ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2068dfb205c3SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2069dfb205c3SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2070e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2071dfb205c3SBarry Smith m = B->rmap->n/bs; 2072dfb205c3SBarry Smith rstart = B->rmap->rstart/bs; 2073dfb205c3SBarry Smith cend = B->cmap->rend/bs; 2074dfb205c3SBarry Smith 2075dfb205c3SBarry Smith if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2076dcca6d9dSJed Brown ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2077dfb205c3SBarry Smith for (i=0; i<m; i++) { 2078dfb205c3SBarry Smith nz = ii[i+1] - ii[i]; 2079dfb205c3SBarry Smith if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 20800cd7f59aSBarry Smith /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2081dfb205c3SBarry Smith JJ = jj + ii[i]; 20820cd7f59aSBarry Smith bd = 0; 2083dfb205c3SBarry Smith for (j=0; j<nz; j++) { 20840cd7f59aSBarry Smith if (*JJ >= i + rstart) break; 2085dfb205c3SBarry Smith JJ++; 20860cd7f59aSBarry Smith bd++; 2087dfb205c3SBarry Smith } 2088dfb205c3SBarry Smith d = 0; 2089dfb205c3SBarry Smith for (; j<nz; j++) { 2090dfb205c3SBarry Smith if (*JJ++ >= cend) break; 2091dfb205c3SBarry Smith d++; 2092dfb205c3SBarry Smith } 2093dfb205c3SBarry Smith d_nnz[i] = d; 20940cd7f59aSBarry Smith o_nnz[i] = nz - d - bd; 20950cd7f59aSBarry Smith nz = nz - bd; 20960cd7f59aSBarry Smith nz_max = PetscMax(nz_max,nz); 2097dfb205c3SBarry Smith } 2098dfb205c3SBarry Smith ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2099bb80cfbbSStefano Zampini ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2100dfb205c3SBarry Smith ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2101dfb205c3SBarry Smith 2102dfb205c3SBarry Smith values = (PetscScalar*)V; 2103dfb205c3SBarry Smith if (!values) { 2104580bdb30SBarry Smith ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2105dfb205c3SBarry Smith } 2106dfb205c3SBarry Smith for (i=0; i<m; i++) { 2107dfb205c3SBarry Smith PetscInt row = i + rstart; 2108dfb205c3SBarry Smith PetscInt ncols = ii[i+1] - ii[i]; 2109dfb205c3SBarry Smith const PetscInt *icols = jj + ii[i]; 2110bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2111dfb205c3SBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2112dfb205c3SBarry Smith ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2113bb80cfbbSStefano Zampini } else { /* block ordering does not match so we can only insert one block at a time. */ 2114bb80cfbbSStefano Zampini PetscInt j; 21150cd7f59aSBarry Smith for (j=0; j<ncols; j++) { 21160cd7f59aSBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 21170cd7f59aSBarry Smith ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 21180cd7f59aSBarry Smith } 21190cd7f59aSBarry Smith } 2120dfb205c3SBarry Smith } 2121dfb205c3SBarry Smith 2122dfb205c3SBarry Smith if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 21233bd0feecSPierre Jolivet nooffprocentries = B->nooffprocentries; 21243bd0feecSPierre Jolivet B->nooffprocentries = PETSC_TRUE; 2125dfb205c3SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2126dfb205c3SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 21273bd0feecSPierre Jolivet B->nooffprocentries = nooffprocentries; 21283bd0feecSPierre Jolivet 21297827cd58SJed Brown ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2130dfb205c3SBarry Smith PetscFunctionReturn(0); 2131dfb205c3SBarry Smith } 2132dfb205c3SBarry Smith 21330bad9183SKris Buschelman /*MC 2134fafad747SKris Buschelman MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2135828413b8SBarry Smith based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2136828413b8SBarry Smith the matrix is stored. 2137828413b8SBarry Smith 2138828413b8SBarry Smith For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2139828413b8SBarry Smith can call MatSetOption(Mat, MAT_HERMITIAN); 21400bad9183SKris Buschelman 21410bad9183SKris Buschelman Options Database Keys: 21420bad9183SKris Buschelman . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 21430bad9183SKris Buschelman 2144476417e5SBarry Smith Notes: 2145476417e5SBarry Smith The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2146476417e5SBarry Smith diagonal portion of the matrix of each process has to less than or equal the number of columns. 2147476417e5SBarry Smith 21480bad9183SKris Buschelman Level: beginner 21490bad9183SKris Buschelman 2150fd292e60Sprj- .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType 21510bad9183SKris Buschelman M*/ 21520bad9183SKris Buschelman 21538cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2154b5df2d14SHong Zhang { 2155b5df2d14SHong Zhang Mat_MPISBAIJ *b; 2156dfbe8321SBarry Smith PetscErrorCode ierr; 215794ae4db5SBarry Smith PetscBool flg = PETSC_FALSE; 2158b5df2d14SHong Zhang 2159b5df2d14SHong Zhang PetscFunctionBegin; 2160b00a9115SJed Brown ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2161b0a32e0cSBarry Smith B->data = (void*)b; 2162b5df2d14SHong Zhang ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2163b5df2d14SHong Zhang 2164b5df2d14SHong Zhang B->ops->destroy = MatDestroy_MPISBAIJ; 2165b5df2d14SHong Zhang B->ops->view = MatView_MPISBAIJ; 2166b5df2d14SHong Zhang B->assembled = PETSC_FALSE; 2167b5df2d14SHong Zhang B->insertmode = NOT_SET_VALUES; 216826fbe8dcSKarl Rupp 2169ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2170ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2171b5df2d14SHong Zhang 2172b5df2d14SHong Zhang /* build local table of row and column ownerships */ 2173854ce69bSBarry Smith ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2174b5df2d14SHong Zhang 2175b5df2d14SHong Zhang /* build cache for off array entries formed */ 2176ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 217726fbe8dcSKarl Rupp 2178b5df2d14SHong Zhang b->donotstash = PETSC_FALSE; 21790298fd71SBarry Smith b->colmap = NULL; 21800298fd71SBarry Smith b->garray = NULL; 2181b5df2d14SHong Zhang b->roworiented = PETSC_TRUE; 2182b5df2d14SHong Zhang 2183b5df2d14SHong Zhang /* stuff used in block assembly */ 2184b5df2d14SHong Zhang b->barray = 0; 2185b5df2d14SHong Zhang 2186b5df2d14SHong Zhang /* stuff used for matrix vector multiply */ 2187b5df2d14SHong Zhang b->lvec = 0; 2188b5df2d14SHong Zhang b->Mvctx = 0; 218940781036SHong Zhang b->slvec0 = 0; 219040781036SHong Zhang b->slvec0b = 0; 219140781036SHong Zhang b->slvec1 = 0; 219240781036SHong Zhang b->slvec1a = 0; 219340781036SHong Zhang b->slvec1b = 0; 219440781036SHong Zhang b->sMvctx = 0; 2195b5df2d14SHong Zhang 2196b5df2d14SHong Zhang /* stuff for MatGetRow() */ 2197b5df2d14SHong Zhang b->rowindices = 0; 2198b5df2d14SHong Zhang b->rowvalues = 0; 2199b5df2d14SHong Zhang b->getrowactive = PETSC_FALSE; 2200b5df2d14SHong Zhang 2201b5df2d14SHong Zhang /* hash table stuff */ 2202b5df2d14SHong Zhang b->ht = 0; 2203b5df2d14SHong Zhang b->hd = 0; 2204b5df2d14SHong Zhang b->ht_size = 0; 2205b5df2d14SHong Zhang b->ht_flag = PETSC_FALSE; 2206b5df2d14SHong Zhang b->ht_fact = 0; 2207b5df2d14SHong Zhang b->ht_total_ct = 0; 2208b5df2d14SHong Zhang b->ht_insert_ct = 0; 2209b5df2d14SHong Zhang 22107dae84e0SHong Zhang /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 22117a868f3eSHong Zhang b->ijonly = PETSC_FALSE; 22127a868f3eSHong Zhang 221359ffdab8SBarry Smith b->in_loc = 0; 221459ffdab8SBarry Smith b->v_loc = 0; 221559ffdab8SBarry Smith b->n_loc = 0; 221694ae4db5SBarry Smith 2217bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2218bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2219bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2220bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 22216214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 22226214f412SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 22236214f412SHong Zhang #endif 222428d58a37SPierre Jolivet ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 222528d58a37SPierre Jolivet ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2226aa5a9175SDahai Guo 222723ce1328SBarry Smith B->symmetric = PETSC_TRUE; 222823ce1328SBarry Smith B->structurally_symmetric = PETSC_TRUE; 222923ce1328SBarry Smith B->symmetric_set = PETSC_TRUE; 223023ce1328SBarry Smith B->structurally_symmetric_set = PETSC_TRUE; 22319899f194SHong Zhang B->symmetric_eternal = PETSC_TRUE; 2232eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 223313647f61SHong Zhang B->hermitian = PETSC_FALSE; 223413647f61SHong Zhang B->hermitian_set = PETSC_FALSE; 2235eb1ec7c1SStefano Zampini #else 2236eb1ec7c1SStefano Zampini B->hermitian = PETSC_TRUE; 2237eb1ec7c1SStefano Zampini B->hermitian_set = PETSC_TRUE; 2238eb1ec7c1SStefano Zampini #endif 223913647f61SHong Zhang 224017667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 224194ae4db5SBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 224294ae4db5SBarry Smith ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 224394ae4db5SBarry Smith if (flg) { 224494ae4db5SBarry Smith PetscReal fact = 1.39; 224594ae4db5SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 224694ae4db5SBarry Smith ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 224794ae4db5SBarry Smith if (fact <= 1.0) fact = 1.39; 224894ae4db5SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 224994ae4db5SBarry Smith ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 225094ae4db5SBarry Smith } 225194ae4db5SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 2252b5df2d14SHong Zhang PetscFunctionReturn(0); 2253b5df2d14SHong Zhang } 2254b5df2d14SHong Zhang 2255209238afSKris Buschelman /*MC 2256002d173eSKris Buschelman MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2257209238afSKris Buschelman 2258209238afSKris Buschelman This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2259209238afSKris Buschelman and MATMPISBAIJ otherwise. 2260209238afSKris Buschelman 2261209238afSKris Buschelman Options Database Keys: 2262209238afSKris Buschelman . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2263209238afSKris Buschelman 2264209238afSKris Buschelman Level: beginner 2265209238afSKris Buschelman 2266209238afSKris Buschelman .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ 2267209238afSKris Buschelman M*/ 2268209238afSKris Buschelman 2269b5df2d14SHong Zhang /*@C 2270b5df2d14SHong Zhang MatMPISBAIJSetPreallocation - For good matrix assembly performance 2271b5df2d14SHong Zhang the user should preallocate the matrix storage by setting the parameters 2272b5df2d14SHong Zhang d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2273b5df2d14SHong Zhang performance can be increased by more than a factor of 50. 2274b5df2d14SHong Zhang 2275b5df2d14SHong Zhang Collective on Mat 2276b5df2d14SHong Zhang 2277b5df2d14SHong Zhang Input Parameters: 22781c4f3114SJed Brown + B - the matrix 2279bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2280bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2281b5df2d14SHong Zhang . d_nz - number of block nonzeros per block row in diagonal portion of local 2282b5df2d14SHong Zhang submatrix (same for all local rows) 2283b5df2d14SHong Zhang . d_nnz - array containing the number of block nonzeros in the various block rows 22846d10fdaeSSatish Balay in the upper triangular and diagonal part of the in diagonal portion of the local 22850298fd71SBarry Smith (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 228695742e49SBarry Smith for the diagonal entry and set a value even if it is zero. 2287b5df2d14SHong Zhang . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2288b5df2d14SHong Zhang submatrix (same for all local rows). 2289b5df2d14SHong Zhang - o_nnz - array containing the number of nonzeros in the various block rows of the 2290c2fc9fa9SBarry Smith off-diagonal portion of the local submatrix that is right of the diagonal 22910298fd71SBarry Smith (possibly different for each block row) or NULL. 2292b5df2d14SHong Zhang 2293b5df2d14SHong Zhang 2294b5df2d14SHong Zhang Options Database Keys: 2295a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2296b5df2d14SHong Zhang block calculations (much slower) 2297a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 2298b5df2d14SHong Zhang 2299b5df2d14SHong Zhang Notes: 2300b5df2d14SHong Zhang 2301b5df2d14SHong Zhang If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2302b5df2d14SHong Zhang than it must be used on all processors that share the object for that argument. 2303b5df2d14SHong Zhang 230449a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 230549a6f317SBarry Smith 2306b5df2d14SHong Zhang Storage Information: 2307b5df2d14SHong Zhang For a square global matrix we define each processor's diagonal portion 2308b5df2d14SHong Zhang to be its local rows and the corresponding columns (a square submatrix); 2309b5df2d14SHong Zhang each processor's off-diagonal portion encompasses the remainder of the 2310b5df2d14SHong Zhang local matrix (a rectangular submatrix). 2311b5df2d14SHong Zhang 2312b5df2d14SHong Zhang The user can specify preallocated storage for the diagonal part of 2313b5df2d14SHong Zhang the local submatrix with either d_nz or d_nnz (not both). Set 23140298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2315b5df2d14SHong Zhang memory allocation. Likewise, specify preallocated storage for the 2316b5df2d14SHong Zhang off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2317b5df2d14SHong Zhang 2318aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 2319aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2320aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 2321aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 2322aa95bbe8SBarry Smith 2323b5df2d14SHong Zhang Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2324b5df2d14SHong Zhang the figure below we depict these three local rows and all columns (0-11). 2325b5df2d14SHong Zhang 2326b5df2d14SHong Zhang .vb 2327b5df2d14SHong Zhang 0 1 2 3 4 5 6 7 8 9 10 11 2328a4b1a0f6SJed Brown -------------------------- 2329c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2330c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2331c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2332a4b1a0f6SJed Brown -------------------------- 2333b5df2d14SHong Zhang .ve 2334b5df2d14SHong Zhang 2335b5df2d14SHong Zhang Thus, any entries in the d locations are stored in the d (diagonal) 2336b5df2d14SHong Zhang submatrix, and any entries in the o locations are stored in the 23376d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 23386d10fdaeSSatish Balay MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2339b5df2d14SHong Zhang 23406d10fdaeSSatish Balay Now d_nz should indicate the number of block nonzeros per row in the upper triangular 23416d10fdaeSSatish Balay plus the diagonal part of the d matrix, 2342c2fc9fa9SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix 2343c2fc9fa9SBarry Smith 2344b5df2d14SHong Zhang In general, for PDE problems in which most nonzeros are near the diagonal, 2345b5df2d14SHong Zhang one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2346b5df2d14SHong Zhang or you will get TERRIBLE performance; see the users' manual chapter on 2347b5df2d14SHong Zhang matrices. 2348b5df2d14SHong Zhang 2349b5df2d14SHong Zhang Level: intermediate 2350b5df2d14SHong Zhang 2351ab978733SBarry Smith .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2352b5df2d14SHong Zhang @*/ 23537087cfbeSBarry Smith PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2354b5df2d14SHong Zhang { 23554ac538c5SBarry Smith PetscErrorCode ierr; 2356b5df2d14SHong Zhang 2357b5df2d14SHong Zhang PetscFunctionBegin; 23586ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 23596ba663aaSJed Brown PetscValidType(B,1); 23606ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 23614ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2362b5df2d14SHong Zhang PetscFunctionReturn(0); 2363b5df2d14SHong Zhang } 2364b5df2d14SHong Zhang 2365a30f8f8cSSatish Balay /*@C 236669b1f4b7SBarry Smith MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2367a30f8f8cSSatish Balay (block compressed row). For good matrix assembly performance 2368a30f8f8cSSatish Balay the user should preallocate the matrix storage by setting the parameters 2369a30f8f8cSSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2370a30f8f8cSSatish Balay performance can be increased by more than a factor of 50. 2371a30f8f8cSSatish Balay 2372d083f849SBarry Smith Collective 2373a30f8f8cSSatish Balay 2374a30f8f8cSSatish Balay Input Parameters: 2375a30f8f8cSSatish Balay + comm - MPI communicator 2376bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2377bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2378a30f8f8cSSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2379a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2380a30f8f8cSSatish Balay y vector for the matrix-vector product y = Ax. 2381a30f8f8cSSatish Balay . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2382a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2383a30f8f8cSSatish Balay x vector for the matrix-vector product y = Ax. 2384a30f8f8cSSatish Balay . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2385a30f8f8cSSatish Balay . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2386a30f8f8cSSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 2387a30f8f8cSSatish Balay submatrix (same for all local rows) 2388a30f8f8cSSatish Balay . d_nnz - array containing the number of block nonzeros in the various block rows 23896d10fdaeSSatish Balay in the upper triangular portion of the in diagonal portion of the local 23900298fd71SBarry Smith (possibly different for each block block row) or NULL. 239195742e49SBarry Smith If you plan to factor the matrix you must leave room for the diagonal entry and 239295742e49SBarry Smith set its value even if it is zero. 2393a30f8f8cSSatish Balay . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2394a30f8f8cSSatish Balay submatrix (same for all local rows). 2395a30f8f8cSSatish Balay - o_nnz - array containing the number of nonzeros in the various block rows of the 2396a30f8f8cSSatish Balay off-diagonal portion of the local submatrix (possibly different for 23970298fd71SBarry Smith each block row) or NULL. 2398a30f8f8cSSatish Balay 2399a30f8f8cSSatish Balay Output Parameter: 2400a30f8f8cSSatish Balay . A - the matrix 2401a30f8f8cSSatish Balay 2402a30f8f8cSSatish Balay Options Database Keys: 2403a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2404a30f8f8cSSatish Balay block calculations (much slower) 2405a30f8f8cSSatish Balay . -mat_block_size - size of the blocks to use 2406a2b725a8SWilliam Gropp - -mat_mpi - use the parallel matrix data structures even on one processor 2407a30f8f8cSSatish Balay (defaults to using SeqBAIJ format on one processor) 2408a30f8f8cSSatish Balay 2409175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2410f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 2411175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2412175b88e8SBarry Smith 2413a30f8f8cSSatish Balay Notes: 2414d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 24156d6d819aSHong Zhang This matrix type does not support complex Hermitian operation. 2416d1be2dadSMatthew Knepley 2417a30f8f8cSSatish Balay The user MUST specify either the local or global matrix dimensions 2418a30f8f8cSSatish Balay (possibly both). 2419a30f8f8cSSatish Balay 2420a30f8f8cSSatish Balay If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2421a30f8f8cSSatish Balay than it must be used on all processors that share the object for that argument. 2422a30f8f8cSSatish Balay 242349a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 242449a6f317SBarry Smith 2425a30f8f8cSSatish Balay Storage Information: 2426a30f8f8cSSatish Balay For a square global matrix we define each processor's diagonal portion 2427a30f8f8cSSatish Balay to be its local rows and the corresponding columns (a square submatrix); 2428a30f8f8cSSatish Balay each processor's off-diagonal portion encompasses the remainder of the 2429a30f8f8cSSatish Balay local matrix (a rectangular submatrix). 2430a30f8f8cSSatish Balay 2431a30f8f8cSSatish Balay The user can specify preallocated storage for the diagonal part of 2432a30f8f8cSSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 24330298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2434a30f8f8cSSatish Balay memory allocation. Likewise, specify preallocated storage for the 2435a30f8f8cSSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2436a30f8f8cSSatish Balay 2437a30f8f8cSSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2438a30f8f8cSSatish Balay the figure below we depict these three local rows and all columns (0-11). 2439a30f8f8cSSatish Balay 2440a30f8f8cSSatish Balay .vb 2441a30f8f8cSSatish Balay 0 1 2 3 4 5 6 7 8 9 10 11 2442a4b1a0f6SJed Brown -------------------------- 2443c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2444c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2445c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2446a4b1a0f6SJed Brown -------------------------- 2447a30f8f8cSSatish Balay .ve 2448a30f8f8cSSatish Balay 2449a30f8f8cSSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 2450a30f8f8cSSatish Balay submatrix, and any entries in the o locations are stored in the 24516d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 24526d10fdaeSSatish Balay MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2453a30f8f8cSSatish Balay 24546d10fdaeSSatish Balay Now d_nz should indicate the number of block nonzeros per row in the upper triangular 24556d10fdaeSSatish Balay plus the diagonal part of the d matrix, 2456a30f8f8cSSatish Balay and o_nz should indicate the number of block nonzeros per row in the o matrix. 2457a30f8f8cSSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 2458a30f8f8cSSatish Balay one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2459a30f8f8cSSatish Balay or you will get TERRIBLE performance; see the users' manual chapter on 2460a30f8f8cSSatish Balay matrices. 2461a30f8f8cSSatish Balay 2462a30f8f8cSSatish Balay Level: intermediate 2463a30f8f8cSSatish Balay 246469b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2465a30f8f8cSSatish Balay @*/ 2466a30f8f8cSSatish Balay 246769b1f4b7SBarry Smith PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2468a30f8f8cSSatish Balay { 24696849ba73SBarry Smith PetscErrorCode ierr; 24701302d50aSBarry Smith PetscMPIInt size; 2471a30f8f8cSSatish Balay 2472a30f8f8cSSatish Balay PetscFunctionBegin; 2473f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 2474f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2475273d9f13SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2476273d9f13SBarry Smith if (size > 1) { 2477b5df2d14SHong Zhang ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2478b5df2d14SHong Zhang ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2479273d9f13SBarry Smith } else { 2480273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2481273d9f13SBarry Smith ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2482273d9f13SBarry Smith } 2483a30f8f8cSSatish Balay PetscFunctionReturn(0); 2484a30f8f8cSSatish Balay } 2485a30f8f8cSSatish Balay 2486a30f8f8cSSatish Balay 24876849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2488a30f8f8cSSatish Balay { 2489a30f8f8cSSatish Balay Mat mat; 2490a30f8f8cSSatish Balay Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2491dfbe8321SBarry Smith PetscErrorCode ierr; 2492d0f46423SBarry Smith PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2493387bc808SHong Zhang PetscScalar *array; 2494a30f8f8cSSatish Balay 2495a30f8f8cSSatish Balay PetscFunctionBegin; 2496a30f8f8cSSatish Balay *newmat = 0; 249726fbe8dcSKarl Rupp 2498ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2499d0f46423SBarry Smith ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 25007adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 25011e1e43feSBarry Smith ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 25021e1e43feSBarry Smith ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2503e1b6402fSHong Zhang 2504d5f3da31SBarry Smith mat->factortype = matin->factortype; 2505273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 250682327fa8SHong Zhang mat->assembled = PETSC_TRUE; 25077fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 25087fff6886SHong Zhang 2509b5df2d14SHong Zhang a = (Mat_MPISBAIJ*)mat->data; 2510a30f8f8cSSatish Balay a->bs2 = oldmat->bs2; 2511a30f8f8cSSatish Balay a->mbs = oldmat->mbs; 2512a30f8f8cSSatish Balay a->nbs = oldmat->nbs; 2513a30f8f8cSSatish Balay a->Mbs = oldmat->Mbs; 2514a30f8f8cSSatish Balay a->Nbs = oldmat->Nbs; 2515a30f8f8cSSatish Balay 2516a30f8f8cSSatish Balay a->size = oldmat->size; 2517a30f8f8cSSatish Balay a->rank = oldmat->rank; 2518a30f8f8cSSatish Balay a->donotstash = oldmat->donotstash; 2519a30f8f8cSSatish Balay a->roworiented = oldmat->roworiented; 2520a30f8f8cSSatish Balay a->rowindices = 0; 2521a30f8f8cSSatish Balay a->rowvalues = 0; 2522a30f8f8cSSatish Balay a->getrowactive = PETSC_FALSE; 2523a30f8f8cSSatish Balay a->barray = 0; 2524899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 2525899cda47SBarry Smith a->rendbs = oldmat->rendbs; 2526899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 2527899cda47SBarry Smith a->cendbs = oldmat->cendbs; 2528a30f8f8cSSatish Balay 2529a30f8f8cSSatish Balay /* hash table stuff */ 2530a30f8f8cSSatish Balay a->ht = 0; 2531a30f8f8cSSatish Balay a->hd = 0; 2532a30f8f8cSSatish Balay a->ht_size = 0; 2533a30f8f8cSSatish Balay a->ht_flag = oldmat->ht_flag; 2534a30f8f8cSSatish Balay a->ht_fact = oldmat->ht_fact; 2535a30f8f8cSSatish Balay a->ht_total_ct = 0; 2536a30f8f8cSSatish Balay a->ht_insert_ct = 0; 2537a30f8f8cSSatish Balay 2538580bdb30SBarry Smith ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr); 2539a30f8f8cSSatish Balay if (oldmat->colmap) { 2540a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 2541a30f8f8cSSatish Balay ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2542a30f8f8cSSatish Balay #else 2543854ce69bSBarry Smith ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 25443bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2545580bdb30SBarry Smith ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 2546a30f8f8cSSatish Balay #endif 2547a30f8f8cSSatish Balay } else a->colmap = 0; 2548387bc808SHong Zhang 2549a30f8f8cSSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2550785e854fSJed Brown ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 25513bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2552580bdb30SBarry Smith ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 2553a30f8f8cSSatish Balay } else a->garray = 0; 2554a30f8f8cSSatish Balay 2555ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2556a30f8f8cSSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 25573bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2558a30f8f8cSSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 25593bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 256082327fa8SHong Zhang 256182327fa8SHong Zhang ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 25623bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 256382327fa8SHong Zhang ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 25643bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2565387bc808SHong Zhang 2566387bc808SHong Zhang ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 25671ebc52fbSHong Zhang ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2568778a2246SBarry Smith ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2569778a2246SBarry Smith ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 25701ebc52fbSHong Zhang ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 25711ebc52fbSHong Zhang ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2572778a2246SBarry Smith ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 25731ebc52fbSHong Zhang ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 25743bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 25753bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 25763bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 25773bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 25783bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2579387bc808SHong Zhang 2580387bc808SHong Zhang /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2581387bc808SHong Zhang ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2582387bc808SHong Zhang a->sMvctx = oldmat->sMvctx; 25833bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 258482327fa8SHong Zhang 2585a30f8f8cSSatish Balay ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 25863bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2587a30f8f8cSSatish Balay ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 25883bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2589140e18c1SBarry Smith ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2590a30f8f8cSSatish Balay *newmat = mat; 2591a30f8f8cSSatish Balay PetscFunctionReturn(0); 2592a30f8f8cSSatish Balay } 2593a30f8f8cSSatish Balay 2594*618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2595*618cc2edSLisandro Dalcin #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2596*618cc2edSLisandro Dalcin 2597*618cc2edSLisandro Dalcin PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer) 259895936485SShri Abhyankar { 259995936485SShri Abhyankar PetscErrorCode ierr; 26007f489da9SVaclav Hapla PetscBool isbinary; 260195936485SShri Abhyankar 260295936485SShri Abhyankar PetscFunctionBegin; 26037f489da9SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2604*618cc2edSLisandro Dalcin if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 2605*618cc2edSLisandro Dalcin ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 260695936485SShri Abhyankar PetscFunctionReturn(0); 260795936485SShri Abhyankar } 260895936485SShri Abhyankar 2609dcf5cc72SBarry Smith /*XXXXX@ 2610a30f8f8cSSatish Balay MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2611a30f8f8cSSatish Balay 2612a30f8f8cSSatish Balay Input Parameters: 2613a30f8f8cSSatish Balay . mat - the matrix 2614a30f8f8cSSatish Balay . fact - factor 2615a30f8f8cSSatish Balay 2616c5eb9154SBarry Smith Not Collective on Mat, each process can have a different hash factor 2617a30f8f8cSSatish Balay 2618a30f8f8cSSatish Balay Level: advanced 2619a30f8f8cSSatish Balay 2620a30f8f8cSSatish Balay Notes: 2621a30f8f8cSSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2622a30f8f8cSSatish Balay 2623a30f8f8cSSatish Balay .seealso: MatSetOption() 2624dcf5cc72SBarry Smith @XXXXX*/ 2625dcf5cc72SBarry Smith 262624d5174aSHong Zhang 2627985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 262824d5174aSHong Zhang { 262924d5174aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2630f4c0e9e4SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2631ca54ac64SHong Zhang PetscReal atmp; 263287828ca2SBarry Smith PetscReal *work,*svalues,*rvalues; 2633dfbe8321SBarry Smith PetscErrorCode ierr; 26341302d50aSBarry Smith PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 26351302d50aSBarry Smith PetscMPIInt rank,size; 26361302d50aSBarry Smith PetscInt *rowners_bs,dest,count,source; 263787828ca2SBarry Smith PetscScalar *va; 26388a1c53f2SBarry Smith MatScalar *ba; 2639f4c0e9e4SHong Zhang MPI_Status stat; 264024d5174aSHong Zhang 264124d5174aSHong Zhang PetscFunctionBegin; 2642e32f2f54SBarry Smith if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 26430298fd71SBarry Smith ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 26441ebc52fbSHong Zhang ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2645f4c0e9e4SHong Zhang 2646ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2647ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2648f4c0e9e4SHong Zhang 2649d0f46423SBarry Smith bs = A->rmap->bs; 2650f4c0e9e4SHong Zhang mbs = a->mbs; 2651f4c0e9e4SHong Zhang Mbs = a->Mbs; 2652f4c0e9e4SHong Zhang ba = b->a; 2653f4c0e9e4SHong Zhang bi = b->i; 2654f4c0e9e4SHong Zhang bj = b->j; 2655f4c0e9e4SHong Zhang 2656f4c0e9e4SHong Zhang /* find ownerships */ 2657d0f46423SBarry Smith rowners_bs = A->rmap->range; 2658f4c0e9e4SHong Zhang 2659f4c0e9e4SHong Zhang /* each proc creates an array to be distributed */ 2660580bdb30SBarry Smith ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2661f4c0e9e4SHong Zhang 2662f4c0e9e4SHong Zhang /* row_max for B */ 2663b8475685SHong Zhang if (rank != size-1) { 2664f4c0e9e4SHong Zhang for (i=0; i<mbs; i++) { 2665f4c0e9e4SHong Zhang ncols = bi[1] - bi[0]; bi++; 2666f4c0e9e4SHong Zhang brow = bs*i; 2667f4c0e9e4SHong Zhang for (j=0; j<ncols; j++) { 2668f4c0e9e4SHong Zhang bcol = bs*(*bj); 2669f4c0e9e4SHong Zhang for (kcol=0; kcol<bs; kcol++) { 2670ca54ac64SHong Zhang col = bcol + kcol; /* local col index */ 267104d41228SHong Zhang col += rowners_bs[rank+1]; /* global col index */ 2672f4c0e9e4SHong Zhang for (krow=0; krow<bs; krow++) { 2673f4c0e9e4SHong Zhang atmp = PetscAbsScalar(*ba); ba++; 2674ca54ac64SHong Zhang row = brow + krow; /* local row index */ 2675ca54ac64SHong Zhang if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2676f4c0e9e4SHong Zhang if (work[col] < atmp) work[col] = atmp; 2677f4c0e9e4SHong Zhang } 2678f4c0e9e4SHong Zhang } 2679f4c0e9e4SHong Zhang bj++; 2680f4c0e9e4SHong Zhang } 2681f4c0e9e4SHong Zhang } 2682f4c0e9e4SHong Zhang 2683f4c0e9e4SHong Zhang /* send values to its owners */ 2684f4c0e9e4SHong Zhang for (dest=rank+1; dest<size; dest++) { 2685f4c0e9e4SHong Zhang svalues = work + rowners_bs[dest]; 2686ca54ac64SHong Zhang count = rowners_bs[dest+1]-rowners_bs[dest]; 2687ce94432eSBarry Smith ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2688ca54ac64SHong Zhang } 2689f4c0e9e4SHong Zhang } 2690f4c0e9e4SHong Zhang 2691f4c0e9e4SHong Zhang /* receive values */ 2692ca54ac64SHong Zhang if (rank) { 2693f4c0e9e4SHong Zhang rvalues = work; 2694ca54ac64SHong Zhang count = rowners_bs[rank+1]-rowners_bs[rank]; 2695f4c0e9e4SHong Zhang for (source=0; source<rank; source++) { 2696ce94432eSBarry Smith ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2697f4c0e9e4SHong Zhang /* process values */ 2698f4c0e9e4SHong Zhang for (i=0; i<count; i++) { 2699ca54ac64SHong Zhang if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2700f4c0e9e4SHong Zhang } 2701f4c0e9e4SHong Zhang } 2702ca54ac64SHong Zhang } 2703f4c0e9e4SHong Zhang 27041ebc52fbSHong Zhang ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2705ac355199SBarry Smith ierr = PetscFree(work);CHKERRQ(ierr); 270624d5174aSHong Zhang PetscFunctionReturn(0); 270724d5174aSHong Zhang } 27082798e883SHong Zhang 270941f059aeSBarry Smith PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 27102798e883SHong Zhang { 27112798e883SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2712dfbe8321SBarry Smith PetscErrorCode ierr; 2713d0f46423SBarry Smith PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 27143649974fSBarry Smith PetscScalar *x,*ptr,*from; 2715ffe4fb16SHong Zhang Vec bb1; 27163649974fSBarry Smith const PetscScalar *b; 2717ffe4fb16SHong Zhang 2718ffe4fb16SHong Zhang PetscFunctionBegin; 2719e32f2f54SBarry Smith if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2720e32f2f54SBarry Smith if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2721ffe4fb16SHong Zhang 2722a2b30743SBarry Smith if (flag == SOR_APPLY_UPPER) { 272341f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2724a2b30743SBarry Smith PetscFunctionReturn(0); 2725a2b30743SBarry Smith } 2726a2b30743SBarry Smith 2727ffe4fb16SHong Zhang if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2728ffe4fb16SHong Zhang if (flag & SOR_ZERO_INITIAL_GUESS) { 272941f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2730ffe4fb16SHong Zhang its--; 2731ffe4fb16SHong Zhang } 2732ffe4fb16SHong Zhang 2733ffe4fb16SHong Zhang ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2734ffe4fb16SHong Zhang while (its--) { 2735ffe4fb16SHong Zhang 2736ffe4fb16SHong Zhang /* lower triangular part: slvec0b = - B^T*xx */ 2737ffe4fb16SHong Zhang ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2738ffe4fb16SHong Zhang 2739ffe4fb16SHong Zhang /* copy xx into slvec0a */ 27401ebc52fbSHong Zhang ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 27411ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2742580bdb30SBarry Smith ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr); 27431ebc52fbSHong Zhang ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2744ffe4fb16SHong Zhang 2745efb30889SBarry Smith ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2746ffe4fb16SHong Zhang 2747ffe4fb16SHong Zhang /* copy bb into slvec1a */ 27481ebc52fbSHong Zhang ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 27493649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2750580bdb30SBarry Smith ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr); 27511ebc52fbSHong Zhang ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2752ffe4fb16SHong Zhang 2753ffe4fb16SHong Zhang /* set slvec1b = 0 */ 2754fa22f6d0SBarry Smith ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2755ffe4fb16SHong Zhang 2756ca9f406cSSatish Balay ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 27571ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2758a8b09249SBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2759ca9f406cSSatish Balay ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2760ffe4fb16SHong Zhang 2761ffe4fb16SHong Zhang /* upper triangular part: bb1 = bb1 - B*x */ 2762ffe4fb16SHong Zhang ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2763ffe4fb16SHong Zhang 2764ffe4fb16SHong Zhang /* local diagonal sweep */ 276541f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2766ffe4fb16SHong Zhang } 27676bf464f9SBarry Smith ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2768fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 276941f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2770fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 277141f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2772fa22f6d0SBarry Smith } else if (flag & SOR_EISENSTAT) { 2773fa22f6d0SBarry Smith Vec xx1; 2774ace3abfcSBarry Smith PetscBool hasop; 277520f1ed55SBarry Smith const PetscScalar *diag; 2776887ee2caSBarry Smith PetscScalar *sl,scale = (omega - 2.0)/omega; 277720f1ed55SBarry Smith PetscInt i,n; 2778fa22f6d0SBarry Smith 2779fa22f6d0SBarry Smith if (!mat->xx1) { 2780fa22f6d0SBarry Smith ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2781fa22f6d0SBarry Smith ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2782fa22f6d0SBarry Smith } 2783fa22f6d0SBarry Smith xx1 = mat->xx1; 2784fa22f6d0SBarry Smith bb1 = mat->bb1; 2785fa22f6d0SBarry Smith 278641f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2787fa22f6d0SBarry Smith 2788fa22f6d0SBarry Smith if (!mat->diag) { 2789effcda25SBarry Smith /* this is wrong for same matrix with new nonzero values */ 27902a7a6963SBarry Smith ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2791fa22f6d0SBarry Smith ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2792fa22f6d0SBarry Smith } 2793fa22f6d0SBarry Smith ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2794fa22f6d0SBarry Smith 2795fa22f6d0SBarry Smith if (hasop) { 2796fa22f6d0SBarry Smith ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2797887ee2caSBarry Smith ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 279820f1ed55SBarry Smith } else { 279920f1ed55SBarry Smith /* 280020f1ed55SBarry Smith These two lines are replaced by code that may be a bit faster for a good compiler 280120f1ed55SBarry Smith ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2802887ee2caSBarry Smith ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 280320f1ed55SBarry Smith */ 280420f1ed55SBarry Smith ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 28053649974fSBarry Smith ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 28063649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 280720f1ed55SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 280820f1ed55SBarry Smith ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2809887ee2caSBarry Smith if (omega == 1.0) { 281026fbe8dcSKarl Rupp for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 281120f1ed55SBarry Smith ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2812887ee2caSBarry Smith } else { 281326fbe8dcSKarl Rupp for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2814887ee2caSBarry Smith ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2815887ee2caSBarry Smith } 281620f1ed55SBarry Smith ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 28173649974fSBarry Smith ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 28183649974fSBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 281920f1ed55SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 282020f1ed55SBarry Smith } 2821fa22f6d0SBarry Smith 2822fa22f6d0SBarry Smith /* multiply off-diagonal portion of matrix */ 2823fa22f6d0SBarry Smith ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2824fa22f6d0SBarry Smith ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2825fa22f6d0SBarry Smith ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2826fa22f6d0SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2827580bdb30SBarry Smith ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 2828fa22f6d0SBarry Smith ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2829fa22f6d0SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2830fa22f6d0SBarry Smith ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2831fa22f6d0SBarry Smith ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2832effcda25SBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2833fa22f6d0SBarry Smith 2834fa22f6d0SBarry Smith /* local sweep */ 283541f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2836fa22f6d0SBarry Smith ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2837f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2838ffe4fb16SHong Zhang PetscFunctionReturn(0); 2839ffe4fb16SHong Zhang } 2840ffe4fb16SHong Zhang 284141f059aeSBarry Smith PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2842ffe4fb16SHong Zhang { 2843ffe4fb16SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2844dfbe8321SBarry Smith PetscErrorCode ierr; 28452798e883SHong Zhang Vec lvec1,bb1; 28462798e883SHong Zhang 28472798e883SHong Zhang PetscFunctionBegin; 2848e32f2f54SBarry Smith if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2849e32f2f54SBarry Smith if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 28502798e883SHong Zhang 2851c14dc6b6SHong Zhang if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 28522798e883SHong Zhang if (flag & SOR_ZERO_INITIAL_GUESS) { 285341f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 28542798e883SHong Zhang its--; 28552798e883SHong Zhang } 28562798e883SHong Zhang 28572798e883SHong Zhang ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 28582798e883SHong Zhang ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 28592798e883SHong Zhang while (its--) { 2860ca9f406cSSatish Balay ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 28612798e883SHong Zhang 28622798e883SHong Zhang /* lower diagonal part: bb1 = bb - B^T*xx */ 28632798e883SHong Zhang ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2864efb30889SBarry Smith ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 28652798e883SHong Zhang 2866ca9f406cSSatish Balay ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 28672798e883SHong Zhang ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2868ca9f406cSSatish Balay ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 28692798e883SHong Zhang 28702798e883SHong Zhang /* upper diagonal part: bb1 = bb1 - B*x */ 2871efb30889SBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 28722798e883SHong Zhang ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 28732798e883SHong Zhang 2874ca9f406cSSatish Balay ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 28752798e883SHong Zhang 2876c14dc6b6SHong Zhang /* diagonal sweep */ 287741f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 28782798e883SHong Zhang } 28796bf464f9SBarry Smith ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 28806bf464f9SBarry Smith ierr = VecDestroy(&bb1);CHKERRQ(ierr); 28816bf464f9SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 28822798e883SHong Zhang PetscFunctionReturn(0); 28832798e883SHong Zhang } 28842798e883SHong Zhang 2885dfb205c3SBarry Smith /*@ 2886dfb205c3SBarry Smith MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2887dfb205c3SBarry Smith CSR format the local rows. 2888dfb205c3SBarry Smith 2889d083f849SBarry Smith Collective 2890dfb205c3SBarry Smith 2891dfb205c3SBarry Smith Input Parameters: 2892dfb205c3SBarry Smith + comm - MPI communicator 2893dfb205c3SBarry Smith . bs - the block size, only a block size of 1 is supported 2894dfb205c3SBarry Smith . m - number of local rows (Cannot be PETSC_DECIDE) 2895dfb205c3SBarry Smith . n - This value should be the same as the local size used in creating the 2896dfb205c3SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2897dfb205c3SBarry Smith calculated if N is given) For square matrices n is almost always m. 2898dfb205c3SBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2899dfb205c3SBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2900483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2901dfb205c3SBarry Smith . j - column indices 2902dfb205c3SBarry Smith - a - matrix values 2903dfb205c3SBarry Smith 2904dfb205c3SBarry Smith Output Parameter: 2905dfb205c3SBarry Smith . mat - the matrix 2906dfb205c3SBarry Smith 2907dfb205c3SBarry Smith Level: intermediate 2908dfb205c3SBarry Smith 2909dfb205c3SBarry Smith Notes: 2910dfb205c3SBarry Smith The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2911dfb205c3SBarry Smith thus you CANNOT change the matrix entries by changing the values of a[] after you have 2912dfb205c3SBarry Smith called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2913dfb205c3SBarry Smith 2914dfb205c3SBarry Smith The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2915dfb205c3SBarry Smith 2916dfb205c3SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 291769b1f4b7SBarry Smith MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2918dfb205c3SBarry Smith @*/ 29197087cfbeSBarry Smith PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2920dfb205c3SBarry Smith { 2921dfb205c3SBarry Smith PetscErrorCode ierr; 2922dfb205c3SBarry Smith 2923dfb205c3SBarry Smith 2924dfb205c3SBarry Smith PetscFunctionBegin; 2925f23aa3ddSBarry Smith if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2926dfb205c3SBarry Smith if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2927dfb205c3SBarry Smith ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2928dfb205c3SBarry Smith ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2929dfb205c3SBarry Smith ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2930dfb205c3SBarry Smith ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2931dfb205c3SBarry Smith PetscFunctionReturn(0); 2932dfb205c3SBarry Smith } 2933dfb205c3SBarry Smith 2934dfb205c3SBarry Smith 2935dfb205c3SBarry Smith /*@C 2936664954b6SBarry Smith MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 2937dfb205c3SBarry Smith 2938d083f849SBarry Smith Collective 2939dfb205c3SBarry Smith 2940dfb205c3SBarry Smith Input Parameters: 29411c4f3114SJed Brown + B - the matrix 2942dfb205c3SBarry Smith . bs - the block size 2943dfb205c3SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2944dfb205c3SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2945dfb205c3SBarry Smith - v - optional values in the matrix 2946dfb205c3SBarry Smith 2947664954b6SBarry Smith Level: advanced 2948664954b6SBarry Smith 2949664954b6SBarry Smith Notes: 29500cd7f59aSBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 29510cd7f59aSBarry Smith and usually the numerical values as well 29520cd7f59aSBarry Smith 295350c5228eSBarry Smith Any entries below the diagonal are ignored 2954dfb205c3SBarry Smith 295569b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2956dfb205c3SBarry Smith @*/ 29577087cfbeSBarry Smith PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2958dfb205c3SBarry Smith { 29594ac538c5SBarry Smith PetscErrorCode ierr; 2960dfb205c3SBarry Smith 2961dfb205c3SBarry Smith PetscFunctionBegin; 29624ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2963dfb205c3SBarry Smith PetscFunctionReturn(0); 2964dfb205c3SBarry Smith } 2965dfb205c3SBarry Smith 296610c56fdeSHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 29674dcd73b1SHong Zhang { 29684dcd73b1SHong Zhang PetscErrorCode ierr; 296910c56fdeSHong Zhang PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 297010c56fdeSHong Zhang PetscInt *indx; 297110c56fdeSHong Zhang PetscScalar *values; 2972dfb205c3SBarry Smith 29734dcd73b1SHong Zhang PetscFunctionBegin; 29744dcd73b1SHong Zhang ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 297510c56fdeSHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 297610c56fdeSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2977de25e9cbSPierre Jolivet PetscInt *dnz,*onz,mbs,Nbs,nbs; 297810c56fdeSHong Zhang PetscInt *bindx,rmax=a->rmax,j; 2979de25e9cbSPierre Jolivet PetscMPIInt rank,size; 29804dcd73b1SHong Zhang 298110c56fdeSHong Zhang ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 298210c56fdeSHong Zhang mbs = m/bs; Nbs = N/cbs; 298310c56fdeSHong Zhang if (n == PETSC_DECIDE) { 2984da91a574SPierre Jolivet ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 298510c56fdeSHong Zhang } 2986da91a574SPierre Jolivet nbs = n/cbs; 29874dcd73b1SHong Zhang 29884dcd73b1SHong Zhang ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2989de25e9cbSPierre Jolivet ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 2990de25e9cbSPierre Jolivet 2991de25e9cbSPierre Jolivet ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2992de25e9cbSPierre Jolivet ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 2993de25e9cbSPierre Jolivet if (rank == size-1) { 2994de25e9cbSPierre Jolivet /* Check sum(nbs) = Nbs */ 2995de25e9cbSPierre Jolivet if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 2996de25e9cbSPierre Jolivet } 2997de25e9cbSPierre Jolivet 2998de25e9cbSPierre Jolivet rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 2999e491d569SHong Zhang ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 300010c56fdeSHong Zhang for (i=0; i<mbs; i++) { 30014dcd73b1SHong Zhang ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 30024dcd73b1SHong Zhang nnz = nnz/bs; 30034dcd73b1SHong Zhang for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 30044dcd73b1SHong Zhang ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 30054dcd73b1SHong Zhang ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 30064dcd73b1SHong Zhang } 3007e491d569SHong Zhang ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 30084dcd73b1SHong Zhang ierr = PetscFree(bindx);CHKERRQ(ierr); 30094dcd73b1SHong Zhang 30104dcd73b1SHong Zhang ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3011de25e9cbSPierre Jolivet ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 30124dcd73b1SHong Zhang ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3013ce94fc41SPierre Jolivet ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr); 3014ce94fc41SPierre Jolivet ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 30154dcd73b1SHong Zhang ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 30164dcd73b1SHong Zhang ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 30174dcd73b1SHong Zhang } 30184dcd73b1SHong Zhang 301910c56fdeSHong Zhang /* numeric phase */ 30204dcd73b1SHong Zhang ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 302110c56fdeSHong Zhang ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 30224dcd73b1SHong Zhang 3023e491d569SHong Zhang ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 30244dcd73b1SHong Zhang for (i=0; i<m; i++) { 30254dcd73b1SHong Zhang ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 30264dcd73b1SHong Zhang Ii = i + rstart; 302710c56fdeSHong Zhang ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 30284dcd73b1SHong Zhang ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 30294dcd73b1SHong Zhang } 3030e491d569SHong Zhang ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 303110c56fdeSHong Zhang ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 303210c56fdeSHong Zhang ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 30334dcd73b1SHong Zhang PetscFunctionReturn(0); 30344dcd73b1SHong Zhang } 3035