1a30f8f8cSSatish Balay 2c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 5c6db04a5SJed Brown #include <petscblaslapack.h> 6a30f8f8cSSatish Balay 76214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 8cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 96214f412SHong Zhang #endif 10*d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 11*d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 12*d24d4204SJose E. Roman #endif 13b147fbf3SStefano Zampini 14b147fbf3SStefano Zampini /* This could be moved to matimpl.h */ 15b147fbf3SStefano Zampini static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 16b147fbf3SStefano Zampini { 17b147fbf3SStefano Zampini Mat preallocator; 18b147fbf3SStefano Zampini PetscInt r,rstart,rend; 19b147fbf3SStefano Zampini PetscInt bs,i,m,n,M,N; 20b147fbf3SStefano Zampini PetscBool cong = PETSC_TRUE; 21b147fbf3SStefano Zampini PetscErrorCode ierr; 22b147fbf3SStefano Zampini 23b147fbf3SStefano Zampini PetscFunctionBegin; 24b147fbf3SStefano Zampini PetscValidHeaderSpecific(B,MAT_CLASSID,1); 25b147fbf3SStefano Zampini PetscValidLogicalCollectiveInt(B,nm,2); 26b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 27b147fbf3SStefano Zampini PetscValidHeaderSpecific(X[i],MAT_CLASSID,3); 28b147fbf3SStefano Zampini ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr); 29b147fbf3SStefano Zampini if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts"); 30b147fbf3SStefano Zampini } 31b147fbf3SStefano Zampini PetscValidLogicalCollectiveBool(B,fill,5); 32b147fbf3SStefano Zampini ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr); 33b147fbf3SStefano Zampini ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr); 34b147fbf3SStefano Zampini ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr); 35b147fbf3SStefano Zampini ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr); 36b147fbf3SStefano Zampini ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr); 37b147fbf3SStefano Zampini ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr); 38b147fbf3SStefano Zampini ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr); 39b147fbf3SStefano Zampini ierr = MatSetUp(preallocator);CHKERRQ(ierr); 40b147fbf3SStefano Zampini ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr); 41b147fbf3SStefano Zampini for (r = rstart; r < rend; ++r) { 42b147fbf3SStefano Zampini PetscInt ncols; 43b147fbf3SStefano Zampini const PetscInt *row; 44b147fbf3SStefano Zampini const PetscScalar *vals; 45b147fbf3SStefano Zampini 46b147fbf3SStefano Zampini for (i = 0; i < nm; i++) { 47b147fbf3SStefano Zampini ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 48b147fbf3SStefano Zampini ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 49b147fbf3SStefano Zampini if (symm && symm[i]) { 50b147fbf3SStefano Zampini ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 51b147fbf3SStefano Zampini } 52b147fbf3SStefano Zampini ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr); 53b147fbf3SStefano Zampini } 54b147fbf3SStefano Zampini } 55b147fbf3SStefano Zampini ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 56b147fbf3SStefano Zampini ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 57b147fbf3SStefano Zampini ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr); 58b147fbf3SStefano Zampini ierr = MatDestroy(&preallocator);CHKERRQ(ierr); 59b147fbf3SStefano Zampini PetscFunctionReturn(0); 60b147fbf3SStefano Zampini } 61b147fbf3SStefano Zampini 6228d58a37SPierre Jolivet PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 63b147fbf3SStefano Zampini { 64b147fbf3SStefano Zampini Mat B; 65b147fbf3SStefano Zampini PetscErrorCode ierr; 66b147fbf3SStefano Zampini PetscInt r; 67b147fbf3SStefano Zampini 68b147fbf3SStefano Zampini PetscFunctionBegin; 69b147fbf3SStefano Zampini if (reuse != MAT_REUSE_MATRIX) { 7028d58a37SPierre Jolivet PetscBool symm = PETSC_TRUE,isdense; 71b147fbf3SStefano Zampini PetscInt bs; 72b147fbf3SStefano Zampini 73b147fbf3SStefano Zampini ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 74b147fbf3SStefano Zampini ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 75b147fbf3SStefano Zampini ierr = MatSetType(B,newtype);CHKERRQ(ierr); 76b147fbf3SStefano Zampini ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 77b147fbf3SStefano Zampini ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr); 78b147fbf3SStefano Zampini ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 79b147fbf3SStefano Zampini ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 8028d58a37SPierre Jolivet ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr); 8128d58a37SPierre Jolivet if (!isdense) { 82b147fbf3SStefano Zampini ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 83b147fbf3SStefano Zampini ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr); 84b147fbf3SStefano Zampini ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 8528d58a37SPierre Jolivet } else { 8628d58a37SPierre Jolivet ierr = MatSetUp(B);CHKERRQ(ierr); 8728d58a37SPierre Jolivet } 8828d58a37SPierre Jolivet } else { 8928d58a37SPierre Jolivet B = *newmat; 9028d58a37SPierre Jolivet ierr = MatZeroEntries(B);CHKERRQ(ierr); 9128d58a37SPierre Jolivet } 92b147fbf3SStefano Zampini 93b147fbf3SStefano Zampini ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 94b147fbf3SStefano Zampini for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 95b147fbf3SStefano Zampini PetscInt ncols; 96b147fbf3SStefano Zampini const PetscInt *row; 97b147fbf3SStefano Zampini const PetscScalar *vals; 98b147fbf3SStefano Zampini 99b147fbf3SStefano Zampini ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 100b147fbf3SStefano Zampini ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr); 101eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 102eb1ec7c1SStefano Zampini if (A->hermitian) { 103eb1ec7c1SStefano Zampini PetscInt i; 104eb1ec7c1SStefano Zampini for (i = 0; i < ncols; i++) { 105eb1ec7c1SStefano Zampini ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr); 106eb1ec7c1SStefano Zampini } 107eb1ec7c1SStefano Zampini } else { 108b147fbf3SStefano Zampini ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 109eb1ec7c1SStefano Zampini } 110eb1ec7c1SStefano Zampini #else 111eb1ec7c1SStefano Zampini ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr); 112eb1ec7c1SStefano Zampini #endif 113b147fbf3SStefano Zampini ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr); 114b147fbf3SStefano Zampini } 115b147fbf3SStefano Zampini ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 116b147fbf3SStefano Zampini ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 117b147fbf3SStefano Zampini ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 118b147fbf3SStefano Zampini 119b147fbf3SStefano Zampini if (reuse == MAT_INPLACE_MATRIX) { 120b147fbf3SStefano Zampini ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 121b147fbf3SStefano Zampini } else { 122b147fbf3SStefano Zampini *newmat = B; 123b147fbf3SStefano Zampini } 124b147fbf3SStefano Zampini PetscFunctionReturn(0); 125b147fbf3SStefano Zampini } 126b147fbf3SStefano Zampini 1277087cfbeSBarry Smith PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 128a30f8f8cSSatish Balay { 129f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 130dfbe8321SBarry Smith PetscErrorCode ierr; 131a30f8f8cSSatish Balay 132a30f8f8cSSatish Balay PetscFunctionBegin; 133a30f8f8cSSatish Balay ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 134a30f8f8cSSatish Balay ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 135a30f8f8cSSatish Balay PetscFunctionReturn(0); 136a30f8f8cSSatish Balay } 137a30f8f8cSSatish Balay 1387087cfbeSBarry Smith PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 139a30f8f8cSSatish Balay { 140f3566a2aSHong Zhang Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 141dfbe8321SBarry Smith PetscErrorCode ierr; 142a30f8f8cSSatish Balay 143a30f8f8cSSatish Balay PetscFunctionBegin; 144a30f8f8cSSatish Balay ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 145a30f8f8cSSatish Balay ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 146a30f8f8cSSatish Balay PetscFunctionReturn(0); 147a30f8f8cSSatish Balay } 148a30f8f8cSSatish Balay 149d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 150a30f8f8cSSatish Balay { \ 151a30f8f8cSSatish Balay brow = row/bs; \ 152a30f8f8cSSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 153a30f8f8cSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 154a30f8f8cSSatish Balay bcol = col/bs; \ 155a30f8f8cSSatish Balay ridx = row % bs; cidx = col % bs; \ 156a30f8f8cSSatish Balay low = 0; high = nrow; \ 157a30f8f8cSSatish Balay while (high-low > 3) { \ 158a30f8f8cSSatish Balay t = (low+high)/2; \ 159a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 160a30f8f8cSSatish Balay else low = t; \ 161a30f8f8cSSatish Balay } \ 162a30f8f8cSSatish Balay for (_i=low; _i<high; _i++) { \ 163a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 164a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 165a30f8f8cSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 166a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 167a30f8f8cSSatish Balay else *bap = value; \ 168a30f8f8cSSatish Balay goto a_noinsert; \ 169a30f8f8cSSatish Balay } \ 170a30f8f8cSSatish Balay } \ 171a30f8f8cSSatish Balay if (a->nonew == 1) goto a_noinsert; \ 172d40312a9SBarry Smith if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 173fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 174a30f8f8cSSatish Balay N = nrow++ - 1; \ 175a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 176580bdb30SBarry Smith ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 177580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 178580bdb30SBarry Smith ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 179a30f8f8cSSatish Balay rp[_i] = bcol; \ 180a30f8f8cSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 181e56f5c9eSBarry Smith A->nonzerostate++;\ 182a30f8f8cSSatish Balay a_noinsert:; \ 183a30f8f8cSSatish Balay ailen[brow] = nrow; \ 184a30f8f8cSSatish Balay } 185e5e170daSBarry Smith 186d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 187a30f8f8cSSatish Balay { \ 188a30f8f8cSSatish Balay brow = row/bs; \ 189a30f8f8cSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 190a30f8f8cSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 191a30f8f8cSSatish Balay bcol = col/bs; \ 192a30f8f8cSSatish Balay ridx = row % bs; cidx = col % bs; \ 193a30f8f8cSSatish Balay low = 0; high = nrow; \ 194a30f8f8cSSatish Balay while (high-low > 3) { \ 195a30f8f8cSSatish Balay t = (low+high)/2; \ 196a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 197a30f8f8cSSatish Balay else low = t; \ 198a30f8f8cSSatish Balay } \ 199a30f8f8cSSatish Balay for (_i=low; _i<high; _i++) { \ 200a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 201a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 202a30f8f8cSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 203a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 204a30f8f8cSSatish Balay else *bap = value; \ 205a30f8f8cSSatish Balay goto b_noinsert; \ 206a30f8f8cSSatish Balay } \ 207a30f8f8cSSatish Balay } \ 208a30f8f8cSSatish Balay if (b->nonew == 1) goto b_noinsert; \ 209d40312a9SBarry Smith if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 210fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 211a30f8f8cSSatish Balay N = nrow++ - 1; \ 212a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 213580bdb30SBarry Smith ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \ 214580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 215580bdb30SBarry Smith ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 216a30f8f8cSSatish Balay rp[_i] = bcol; \ 217a30f8f8cSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 218e56f5c9eSBarry Smith B->nonzerostate++;\ 219a30f8f8cSSatish Balay b_noinsert:; \ 220a30f8f8cSSatish Balay bilen[brow] = nrow; \ 221a30f8f8cSSatish Balay } 222a30f8f8cSSatish Balay 223a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks). 224476417e5SBarry Smith Any a(i,j) with i>j input by user is ingored or generates an error 225a30f8f8cSSatish Balay */ 226dd6ea824SBarry Smith PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 227a30f8f8cSSatish Balay { 228a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 229a30f8f8cSSatish Balay MatScalar value; 230ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 231dfbe8321SBarry Smith PetscErrorCode ierr; 2321302d50aSBarry Smith PetscInt i,j,row,col; 233d0f46423SBarry Smith PetscInt rstart_orig=mat->rmap->rstart; 234d0f46423SBarry Smith PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 235d0f46423SBarry Smith PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 236a30f8f8cSSatish Balay 237a30f8f8cSSatish Balay /* Some Variables required in the macro */ 238a30f8f8cSSatish Balay Mat A = baij->A; 239a30f8f8cSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 2401302d50aSBarry Smith PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 241a30f8f8cSSatish Balay MatScalar *aa =a->a; 242a30f8f8cSSatish Balay 243a30f8f8cSSatish Balay Mat B = baij->B; 244a30f8f8cSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 2451302d50aSBarry Smith PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 246a30f8f8cSSatish Balay MatScalar *ba =b->a; 247a30f8f8cSSatish Balay 2481302d50aSBarry Smith PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 2491302d50aSBarry Smith PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 250a30f8f8cSSatish Balay MatScalar *ap,*bap; 251a30f8f8cSSatish Balay 252a30f8f8cSSatish Balay /* for stash */ 2530298fd71SBarry Smith PetscInt n_loc, *in_loc = NULL; 2540298fd71SBarry Smith MatScalar *v_loc = NULL; 255a30f8f8cSSatish Balay 256a30f8f8cSSatish Balay PetscFunctionBegin; 257a30f8f8cSSatish Balay if (!baij->donotstash) { 25859ffdab8SBarry Smith if (n > baij->n_loc) { 25959ffdab8SBarry Smith ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 26059ffdab8SBarry Smith ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 261785e854fSJed Brown ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 262785e854fSJed Brown ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 26326fbe8dcSKarl Rupp 26459ffdab8SBarry Smith baij->n_loc = n; 26559ffdab8SBarry Smith } 26659ffdab8SBarry Smith in_loc = baij->in_loc; 26759ffdab8SBarry Smith v_loc = baij->v_loc; 268a30f8f8cSSatish Balay } 269a30f8f8cSSatish Balay 270a30f8f8cSSatish Balay for (i=0; i<m; i++) { 271a30f8f8cSSatish Balay if (im[i] < 0) continue; 272cf9c20a2SJed Brown if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 273a30f8f8cSSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 274a30f8f8cSSatish Balay row = im[i] - rstart_orig; /* local row index */ 275a30f8f8cSSatish Balay for (j=0; j<n; j++) { 27601b2bd88SHong Zhang if (im[i]/bs > in[j]/bs) { 27701b2bd88SHong Zhang if (a->ignore_ltriangular) { 27801b2bd88SHong Zhang continue; /* ignore lower triangular blocks */ 27926fbe8dcSKarl Rupp } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 28001b2bd88SHong Zhang } 281a30f8f8cSSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 282a30f8f8cSSatish Balay col = in[j] - cstart_orig; /* local col index */ 283a30f8f8cSSatish Balay brow = row/bs; bcol = col/bs; 284a30f8f8cSSatish Balay if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 285db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 286db4deed7SKarl Rupp else value = v[i+j*m]; 287d40312a9SBarry Smith MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 288a30f8f8cSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 289a30f8f8cSSatish Balay } else if (in[j] < 0) continue; 290cf9c20a2SJed Brown else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 291a30f8f8cSSatish Balay else { /* off-diag entry (B) */ 292a30f8f8cSSatish Balay if (mat->was_assembled) { 293a30f8f8cSSatish Balay if (!baij->colmap) { 294ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 295a30f8f8cSSatish Balay } 296a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 297a30f8f8cSSatish Balay ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 29871730473SSatish Balay col = col - 1; 299a30f8f8cSSatish Balay #else 30071730473SSatish Balay col = baij->colmap[in[j]/bs] - 1; 301a30f8f8cSSatish Balay #endif 302a30f8f8cSSatish Balay if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 303ab9863d7SBarry Smith ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 304a30f8f8cSSatish Balay col = in[j]; 305a30f8f8cSSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 306a30f8f8cSSatish Balay B = baij->B; 307a30f8f8cSSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 308a30f8f8cSSatish Balay bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 309a30f8f8cSSatish Balay ba = b->a; 31071730473SSatish Balay } else col += in[j]%bs; 311a30f8f8cSSatish Balay } else col = in[j]; 312db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 313db4deed7SKarl Rupp else value = v[i+j*m]; 314d40312a9SBarry Smith MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 315a30f8f8cSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 316a30f8f8cSSatish Balay } 317a30f8f8cSSatish Balay } 318a30f8f8cSSatish Balay } else { /* off processor entry */ 3194cb17eb5SBarry Smith if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 320a30f8f8cSSatish Balay if (!baij->donotstash) { 3215080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 322a30f8f8cSSatish Balay n_loc = 0; 323a30f8f8cSSatish Balay for (j=0; j<n; j++) { 324f65c83cfSHong Zhang if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 325a30f8f8cSSatish Balay in_loc[n_loc] = in[j]; 326a30f8f8cSSatish Balay if (roworiented) { 327a30f8f8cSSatish Balay v_loc[n_loc] = v[i*n+j]; 328a30f8f8cSSatish Balay } else { 329a30f8f8cSSatish Balay v_loc[n_loc] = v[j*m+i]; 330a30f8f8cSSatish Balay } 331a30f8f8cSSatish Balay n_loc++; 332a30f8f8cSSatish Balay } 333b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 334a30f8f8cSSatish Balay } 335a30f8f8cSSatish Balay } 336a30f8f8cSSatish Balay } 337a30f8f8cSSatish Balay PetscFunctionReturn(0); 338a30f8f8cSSatish Balay } 339a30f8f8cSSatish Balay 34036bd2089SBarry Smith PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 34136bd2089SBarry Smith { 34236bd2089SBarry Smith Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 34336bd2089SBarry Smith PetscErrorCode ierr; 34436bd2089SBarry Smith PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 34536bd2089SBarry Smith PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen; 34636bd2089SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 34736bd2089SBarry Smith PetscBool roworiented=a->roworiented; 34836bd2089SBarry Smith const PetscScalar *value = v; 34936bd2089SBarry Smith MatScalar *ap,*aa = a->a,*bap; 35036bd2089SBarry Smith 35136bd2089SBarry Smith PetscFunctionBegin; 35236bd2089SBarry Smith if (col < row) { 35336bd2089SBarry Smith if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 35436bd2089SBarry Smith else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 35536bd2089SBarry Smith } 35636bd2089SBarry Smith rp = aj + ai[row]; 35736bd2089SBarry Smith ap = aa + bs2*ai[row]; 35836bd2089SBarry Smith rmax = imax[row]; 35936bd2089SBarry Smith nrow = ailen[row]; 36036bd2089SBarry Smith value = v; 36136bd2089SBarry Smith low = 0; 36236bd2089SBarry Smith high = nrow; 36336bd2089SBarry Smith 36436bd2089SBarry Smith while (high-low > 7) { 36536bd2089SBarry Smith t = (low+high)/2; 36636bd2089SBarry Smith if (rp[t] > col) high = t; 36736bd2089SBarry Smith else low = t; 36836bd2089SBarry Smith } 36936bd2089SBarry Smith for (i=low; i<high; i++) { 37036bd2089SBarry Smith if (rp[i] > col) break; 37136bd2089SBarry Smith if (rp[i] == col) { 37236bd2089SBarry Smith bap = ap + bs2*i; 37336bd2089SBarry Smith if (roworiented) { 37436bd2089SBarry Smith if (is == ADD_VALUES) { 37536bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 37636bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 37736bd2089SBarry Smith bap[jj] += *value++; 37836bd2089SBarry Smith } 37936bd2089SBarry Smith } 38036bd2089SBarry Smith } else { 38136bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 38236bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 38336bd2089SBarry Smith bap[jj] = *value++; 38436bd2089SBarry Smith } 38536bd2089SBarry Smith } 38636bd2089SBarry Smith } 38736bd2089SBarry Smith } else { 38836bd2089SBarry Smith if (is == ADD_VALUES) { 38936bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 39036bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 39136bd2089SBarry Smith *bap++ += *value++; 39236bd2089SBarry Smith } 39336bd2089SBarry Smith } 39436bd2089SBarry Smith } else { 39536bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 39636bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 39736bd2089SBarry Smith *bap++ = *value++; 39836bd2089SBarry Smith } 39936bd2089SBarry Smith } 40036bd2089SBarry Smith } 40136bd2089SBarry Smith } 40236bd2089SBarry Smith goto noinsert2; 40336bd2089SBarry Smith } 40436bd2089SBarry Smith } 40536bd2089SBarry Smith if (nonew == 1) goto noinsert2; 40636bd2089SBarry Smith if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol); 40736bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 40836bd2089SBarry Smith N = nrow++ - 1; high++; 40936bd2089SBarry Smith /* shift up all the later entries in this row */ 410580bdb30SBarry Smith ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 411580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 41236bd2089SBarry Smith rp[i] = col; 41336bd2089SBarry Smith bap = ap + bs2*i; 41436bd2089SBarry Smith if (roworiented) { 41536bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 41636bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 41736bd2089SBarry Smith bap[jj] = *value++; 41836bd2089SBarry Smith } 41936bd2089SBarry Smith } 42036bd2089SBarry Smith } else { 42136bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 42236bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 42336bd2089SBarry Smith *bap++ = *value++; 42436bd2089SBarry Smith } 42536bd2089SBarry Smith } 42636bd2089SBarry Smith } 42736bd2089SBarry Smith noinsert2:; 42836bd2089SBarry Smith ailen[row] = nrow; 42936bd2089SBarry Smith PetscFunctionReturn(0); 43036bd2089SBarry Smith } 43136bd2089SBarry Smith 43236bd2089SBarry Smith /* 43336bd2089SBarry Smith This routine is exactly duplicated in mpibaij.c 43436bd2089SBarry Smith */ 43536bd2089SBarry Smith PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 43636bd2089SBarry Smith { 43736bd2089SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 43836bd2089SBarry Smith PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 43936bd2089SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 44036bd2089SBarry Smith PetscErrorCode ierr; 44136bd2089SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 44236bd2089SBarry Smith PetscBool roworiented=a->roworiented; 44336bd2089SBarry Smith const PetscScalar *value = v; 44436bd2089SBarry Smith MatScalar *ap,*aa = a->a,*bap; 44536bd2089SBarry Smith 44636bd2089SBarry Smith PetscFunctionBegin; 44736bd2089SBarry Smith rp = aj + ai[row]; 44836bd2089SBarry Smith ap = aa + bs2*ai[row]; 44936bd2089SBarry Smith rmax = imax[row]; 45036bd2089SBarry Smith nrow = ailen[row]; 45136bd2089SBarry Smith low = 0; 45236bd2089SBarry Smith high = nrow; 45336bd2089SBarry Smith value = v; 45436bd2089SBarry Smith while (high-low > 7) { 45536bd2089SBarry Smith t = (low+high)/2; 45636bd2089SBarry Smith if (rp[t] > col) high = t; 45736bd2089SBarry Smith else low = t; 45836bd2089SBarry Smith } 45936bd2089SBarry Smith for (i=low; i<high; i++) { 46036bd2089SBarry Smith if (rp[i] > col) break; 46136bd2089SBarry Smith if (rp[i] == col) { 46236bd2089SBarry Smith bap = ap + bs2*i; 46336bd2089SBarry Smith if (roworiented) { 46436bd2089SBarry Smith if (is == ADD_VALUES) { 46536bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 46636bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 46736bd2089SBarry Smith bap[jj] += *value++; 46836bd2089SBarry Smith } 46936bd2089SBarry Smith } 47036bd2089SBarry Smith } else { 47136bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 47236bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 47336bd2089SBarry Smith bap[jj] = *value++; 47436bd2089SBarry Smith } 47536bd2089SBarry Smith } 47636bd2089SBarry Smith } 47736bd2089SBarry Smith } else { 47836bd2089SBarry Smith if (is == ADD_VALUES) { 47936bd2089SBarry Smith for (ii=0; ii<bs; ii++,value+=bs) { 48036bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 48136bd2089SBarry Smith bap[jj] += value[jj]; 48236bd2089SBarry Smith } 48336bd2089SBarry Smith bap += bs; 48436bd2089SBarry Smith } 48536bd2089SBarry Smith } else { 48636bd2089SBarry Smith for (ii=0; ii<bs; ii++,value+=bs) { 48736bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 48836bd2089SBarry Smith bap[jj] = value[jj]; 48936bd2089SBarry Smith } 49036bd2089SBarry Smith bap += bs; 49136bd2089SBarry Smith } 49236bd2089SBarry Smith } 49336bd2089SBarry Smith } 49436bd2089SBarry Smith goto noinsert2; 49536bd2089SBarry Smith } 49636bd2089SBarry Smith } 49736bd2089SBarry Smith if (nonew == 1) goto noinsert2; 49836bd2089SBarry Smith if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 49936bd2089SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 50036bd2089SBarry Smith N = nrow++ - 1; high++; 50136bd2089SBarry Smith /* shift up all the later entries in this row */ 502580bdb30SBarry Smith ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 503580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 50436bd2089SBarry Smith rp[i] = col; 50536bd2089SBarry Smith bap = ap + bs2*i; 50636bd2089SBarry Smith if (roworiented) { 50736bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 50836bd2089SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 50936bd2089SBarry Smith bap[jj] = *value++; 51036bd2089SBarry Smith } 51136bd2089SBarry Smith } 51236bd2089SBarry Smith } else { 51336bd2089SBarry Smith for (ii=0; ii<bs; ii++) { 51436bd2089SBarry Smith for (jj=0; jj<bs; jj++) { 51536bd2089SBarry Smith *bap++ = *value++; 51636bd2089SBarry Smith } 51736bd2089SBarry Smith } 51836bd2089SBarry Smith } 51936bd2089SBarry Smith noinsert2:; 52036bd2089SBarry Smith ailen[row] = nrow; 52136bd2089SBarry Smith PetscFunctionReturn(0); 52236bd2089SBarry Smith } 52336bd2089SBarry Smith 52436bd2089SBarry Smith /* 52536bd2089SBarry Smith This routine could be optimized by removing the need for the block copy below and passing stride information 52636bd2089SBarry Smith to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 52736bd2089SBarry Smith */ 528dd6ea824SBarry Smith PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 529a30f8f8cSSatish Balay { 5300880e062SHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 531f15d580aSBarry Smith const MatScalar *value; 532f15d580aSBarry Smith MatScalar *barray =baij->barray; 533ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 534dfbe8321SBarry Smith PetscErrorCode ierr; 535899cda47SBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 536476417e5SBarry Smith PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 537476417e5SBarry Smith PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 5380880e062SHong Zhang 539a30f8f8cSSatish Balay PetscFunctionBegin; 5400880e062SHong Zhang if (!barray) { 541785e854fSJed Brown ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 5420880e062SHong Zhang baij->barray = barray; 5430880e062SHong Zhang } 5440880e062SHong Zhang 5450880e062SHong Zhang if (roworiented) { 5460880e062SHong Zhang stepval = (n-1)*bs; 5470880e062SHong Zhang } else { 5480880e062SHong Zhang stepval = (m-1)*bs; 5490880e062SHong Zhang } 5500880e062SHong Zhang for (i=0; i<m; i++) { 5510880e062SHong Zhang if (im[i] < 0) continue; 552cf9c20a2SJed Brown if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 5530880e062SHong Zhang if (im[i] >= rstart && im[i] < rend) { 5540880e062SHong Zhang row = im[i] - rstart; 5550880e062SHong Zhang for (j=0; j<n; j++) { 556f3f98c53SJed Brown if (im[i] > in[j]) { 557f3f98c53SJed Brown if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 558e32f2f54SBarry Smith else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 559f3f98c53SJed Brown } 5600880e062SHong Zhang /* If NumCol = 1 then a copy is not required */ 5610880e062SHong Zhang if ((roworiented) && (n == 1)) { 562f15d580aSBarry Smith barray = (MatScalar*) v + i*bs2; 5630880e062SHong Zhang } else if ((!roworiented) && (m == 1)) { 564f15d580aSBarry Smith barray = (MatScalar*) v + j*bs2; 5650880e062SHong Zhang } else { /* Here a copy is required */ 5660880e062SHong Zhang if (roworiented) { 5670880e062SHong Zhang value = v + i*(stepval+bs)*bs + j*bs; 5680880e062SHong Zhang } else { 5690880e062SHong Zhang value = v + j*(stepval+bs)*bs + i*bs; 5700880e062SHong Zhang } 5710880e062SHong Zhang for (ii=0; ii<bs; ii++,value+=stepval) { 5720880e062SHong Zhang for (jj=0; jj<bs; jj++) { 5730880e062SHong Zhang *barray++ = *value++; 5740880e062SHong Zhang } 5750880e062SHong Zhang } 5760880e062SHong Zhang barray -=bs2; 5770880e062SHong Zhang } 5780880e062SHong Zhang 5790880e062SHong Zhang if (in[j] >= cstart && in[j] < cend) { 5800880e062SHong Zhang col = in[j] - cstart; 58136bd2089SBarry Smith ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 58226fbe8dcSKarl Rupp } else if (in[j] < 0) continue; 583cf9c20a2SJed Brown else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 5840880e062SHong Zhang else { 5850880e062SHong Zhang if (mat->was_assembled) { 5860880e062SHong Zhang if (!baij->colmap) { 587ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 5880880e062SHong Zhang } 5890880e062SHong Zhang 5902515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 5910880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 5921302d50aSBarry Smith { PetscInt data; 5930880e062SHong Zhang ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 594e32f2f54SBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 5950880e062SHong Zhang } 5960880e062SHong Zhang #else 597e32f2f54SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 5980880e062SHong Zhang #endif 5990880e062SHong Zhang #endif 6000880e062SHong Zhang #if defined(PETSC_USE_CTABLE) 6010880e062SHong Zhang ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6020880e062SHong Zhang col = (col - 1)/bs; 6030880e062SHong Zhang #else 6040880e062SHong Zhang col = (baij->colmap[in[j]] - 1)/bs; 6050880e062SHong Zhang #endif 6060880e062SHong Zhang if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 607ab9863d7SBarry Smith ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 6080880e062SHong Zhang col = in[j]; 6090880e062SHong Zhang } 61026fbe8dcSKarl Rupp } else col = in[j]; 61136bd2089SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 6120880e062SHong Zhang } 6130880e062SHong Zhang } 6140880e062SHong Zhang } else { 615bb003d0fSBarry Smith if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 6160880e062SHong Zhang if (!baij->donotstash) { 6170880e062SHong Zhang if (roworiented) { 6180880e062SHong Zhang ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6190880e062SHong Zhang } else { 6200880e062SHong Zhang ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6210880e062SHong Zhang } 6220880e062SHong Zhang } 6230880e062SHong Zhang } 6240880e062SHong Zhang } 6250880e062SHong Zhang PetscFunctionReturn(0); 626a30f8f8cSSatish Balay } 627a30f8f8cSSatish Balay 6281302d50aSBarry Smith PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629a30f8f8cSSatish Balay { 630f3566a2aSHong Zhang Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 6316849ba73SBarry Smith PetscErrorCode ierr; 632d0f46423SBarry Smith PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 633d0f46423SBarry Smith PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 634a30f8f8cSSatish Balay 635a30f8f8cSSatish Balay PetscFunctionBegin; 636a30f8f8cSSatish Balay for (i=0; i<m; i++) { 637e32f2f54SBarry Smith if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 638e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639a30f8f8cSSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 640a30f8f8cSSatish Balay row = idxm[i] - bsrstart; 641a30f8f8cSSatish Balay for (j=0; j<n; j++) { 642e32f2f54SBarry Smith if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 643e32f2f54SBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644a30f8f8cSSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend) { 645a30f8f8cSSatish Balay col = idxn[j] - bscstart; 646c8407628SSatish Balay ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647a30f8f8cSSatish Balay } else { 648a30f8f8cSSatish Balay if (!baij->colmap) { 649ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 650a30f8f8cSSatish Balay } 651a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 652a30f8f8cSSatish Balay ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 653a30f8f8cSSatish Balay data--; 654a30f8f8cSSatish Balay #else 655a30f8f8cSSatish Balay data = baij->colmap[idxn[j]/bs]-1; 656a30f8f8cSSatish Balay #endif 657a30f8f8cSSatish Balay if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 658a30f8f8cSSatish Balay else { 659a30f8f8cSSatish Balay col = data + idxn[j]%bs; 660e249d750SSatish Balay ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 661a30f8f8cSSatish Balay } 662a30f8f8cSSatish Balay } 663a30f8f8cSSatish Balay } 664f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 665a30f8f8cSSatish Balay } 666a30f8f8cSSatish Balay PetscFunctionReturn(0); 667a30f8f8cSSatish Balay } 668a30f8f8cSSatish Balay 669dfbe8321SBarry Smith PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 670a30f8f8cSSatish Balay { 671a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 672dfbe8321SBarry Smith PetscErrorCode ierr; 673a30f8f8cSSatish Balay PetscReal sum[2],*lnorm2; 674a30f8f8cSSatish Balay 675a30f8f8cSSatish Balay PetscFunctionBegin; 676a30f8f8cSSatish Balay if (baij->size == 1) { 677a30f8f8cSSatish Balay ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 678a30f8f8cSSatish Balay } else { 679a30f8f8cSSatish Balay if (type == NORM_FROBENIUS) { 680785e854fSJed Brown ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 681a30f8f8cSSatish Balay ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 682a30f8f8cSSatish Balay *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 683a30f8f8cSSatish Balay ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 684a30f8f8cSSatish Balay *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 685b2566f29SBarry Smith ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6868f1a2a5eSBarry Smith *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 687a30f8f8cSSatish Balay ierr = PetscFree(lnorm2);CHKERRQ(ierr); 6880b8dc8d2SHong Zhang } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 6890b8dc8d2SHong Zhang Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 6900b8dc8d2SHong Zhang Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 6910b8dc8d2SHong Zhang PetscReal *rsum,*rsum2,vabs; 692899cda47SBarry Smith PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 693d0f46423SBarry Smith PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 6940b8dc8d2SHong Zhang MatScalar *v; 6950b8dc8d2SHong Zhang 696dcca6d9dSJed Brown ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 697580bdb30SBarry Smith ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr); 6980b8dc8d2SHong Zhang /* Amat */ 6990b8dc8d2SHong Zhang v = amat->a; jj = amat->j; 7000b8dc8d2SHong Zhang for (brow=0; brow<mbs; brow++) { 7010b8dc8d2SHong Zhang grow = bs*(rstart + brow); 7020b8dc8d2SHong Zhang nz = amat->i[brow+1] - amat->i[brow]; 7030b8dc8d2SHong Zhang for (bcol=0; bcol<nz; bcol++) { 7040b8dc8d2SHong Zhang gcol = bs*(rstart + *jj); jj++; 7050b8dc8d2SHong Zhang for (col=0; col<bs; col++) { 7060b8dc8d2SHong Zhang for (row=0; row<bs; row++) { 7070b8dc8d2SHong Zhang vabs = PetscAbsScalar(*v); v++; 7080b8dc8d2SHong Zhang rsum[gcol+col] += vabs; 7090b8dc8d2SHong Zhang /* non-diagonal block */ 7100b8dc8d2SHong Zhang if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 7110b8dc8d2SHong Zhang } 7120b8dc8d2SHong Zhang } 7130b8dc8d2SHong Zhang } 71451f70360SJed Brown ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 7150b8dc8d2SHong Zhang } 7160b8dc8d2SHong Zhang /* Bmat */ 7170b8dc8d2SHong Zhang v = bmat->a; jj = bmat->j; 7180b8dc8d2SHong Zhang for (brow=0; brow<mbs; brow++) { 7190b8dc8d2SHong Zhang grow = bs*(rstart + brow); 7200b8dc8d2SHong Zhang nz = bmat->i[brow+1] - bmat->i[brow]; 7210b8dc8d2SHong Zhang for (bcol=0; bcol<nz; bcol++) { 7220b8dc8d2SHong Zhang gcol = bs*garray[*jj]; jj++; 7230b8dc8d2SHong Zhang for (col=0; col<bs; col++) { 7240b8dc8d2SHong Zhang for (row=0; row<bs; row++) { 7250b8dc8d2SHong Zhang vabs = PetscAbsScalar(*v); v++; 7260b8dc8d2SHong Zhang rsum[gcol+col] += vabs; 7270b8dc8d2SHong Zhang rsum[grow+row] += vabs; 7280b8dc8d2SHong Zhang } 7290b8dc8d2SHong Zhang } 7300b8dc8d2SHong Zhang } 73151f70360SJed Brown ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr); 7320b8dc8d2SHong Zhang } 733b2566f29SBarry Smith ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7340b8dc8d2SHong Zhang *norm = 0.0; 735d0f46423SBarry Smith for (col=0; col<mat->cmap->N; col++) { 7360b8dc8d2SHong Zhang if (rsum2[col] > *norm) *norm = rsum2[col]; 7370b8dc8d2SHong Zhang } 73874ed9c26SBarry Smith ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 739f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 740a30f8f8cSSatish Balay } 741a30f8f8cSSatish Balay PetscFunctionReturn(0); 742a30f8f8cSSatish Balay } 743a30f8f8cSSatish Balay 744dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 745a30f8f8cSSatish Balay { 746a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 747dfbe8321SBarry Smith PetscErrorCode ierr; 7481302d50aSBarry Smith PetscInt nstash,reallocs; 749a30f8f8cSSatish Balay 750a30f8f8cSSatish Balay PetscFunctionBegin; 75126fbe8dcSKarl Rupp if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 752a30f8f8cSSatish Balay 753d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 7541e2582c4SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 755a30f8f8cSSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 7561e2582c4SBarry Smith ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 757a30f8f8cSSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 7581e2582c4SBarry Smith ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 759a30f8f8cSSatish Balay PetscFunctionReturn(0); 760a30f8f8cSSatish Balay } 761a30f8f8cSSatish Balay 762dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 763a30f8f8cSSatish Balay { 764a30f8f8cSSatish Balay Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 765a30f8f8cSSatish Balay Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 7666849ba73SBarry Smith PetscErrorCode ierr; 76713f74950SBarry Smith PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 768e44c0bd4SBarry Smith PetscInt *row,*col; 769ace3abfcSBarry Smith PetscBool other_disassembled; 77013f74950SBarry Smith PetscMPIInt n; 771ace3abfcSBarry Smith PetscBool r1,r2,r3; 772a30f8f8cSSatish Balay MatScalar *val; 773a30f8f8cSSatish Balay 77491c97fd4SSatish Balay /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 775a30f8f8cSSatish Balay PetscFunctionBegin; 7764cb17eb5SBarry Smith if (!baij->donotstash && !mat->nooffprocentries) { 777a30f8f8cSSatish Balay while (1) { 778a30f8f8cSSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 779a30f8f8cSSatish Balay if (!flg) break; 780a30f8f8cSSatish Balay 781a30f8f8cSSatish Balay for (i=0; i<n;) { 782a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 78326fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 78426fbe8dcSKarl Rupp if (row[j] != rstart) break; 78526fbe8dcSKarl Rupp } 786a30f8f8cSSatish Balay if (j < n) ncols = j-i; 787a30f8f8cSSatish Balay else ncols = n-i; 788a30f8f8cSSatish Balay /* Now assemble all these values with a single function call */ 7894b4eb8d3SJed Brown ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 790a30f8f8cSSatish Balay i = j; 791a30f8f8cSSatish Balay } 792a30f8f8cSSatish Balay } 793a30f8f8cSSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 794a30f8f8cSSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 795a30f8f8cSSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 796a30f8f8cSSatish Balay restore the original flags */ 797a30f8f8cSSatish Balay r1 = baij->roworiented; 798a30f8f8cSSatish Balay r2 = a->roworiented; 79991c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 80026fbe8dcSKarl Rupp 801a30f8f8cSSatish Balay baij->roworiented = PETSC_FALSE; 802a30f8f8cSSatish Balay a->roworiented = PETSC_FALSE; 80326fbe8dcSKarl Rupp 80491c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 805a30f8f8cSSatish Balay while (1) { 806a30f8f8cSSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 807a30f8f8cSSatish Balay if (!flg) break; 808a30f8f8cSSatish Balay 809a30f8f8cSSatish Balay for (i=0; i<n;) { 810a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 81126fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 81226fbe8dcSKarl Rupp if (row[j] != rstart) break; 81326fbe8dcSKarl Rupp } 814a30f8f8cSSatish Balay if (j < n) ncols = j-i; 815a30f8f8cSSatish Balay else ncols = n-i; 8164b4eb8d3SJed Brown ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 817a30f8f8cSSatish Balay i = j; 818a30f8f8cSSatish Balay } 819a30f8f8cSSatish Balay } 820a30f8f8cSSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 82126fbe8dcSKarl Rupp 822a30f8f8cSSatish Balay baij->roworiented = r1; 823a30f8f8cSSatish Balay a->roworiented = r2; 82426fbe8dcSKarl Rupp 82591c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 826a30f8f8cSSatish Balay } 827a30f8f8cSSatish Balay 828a30f8f8cSSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 829a30f8f8cSSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 830a30f8f8cSSatish Balay 831a30f8f8cSSatish Balay /* determine if any processor has disassembled, if so we must 832a30f8f8cSSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 833a30f8f8cSSatish Balay /* 834a30f8f8cSSatish Balay if nonzero structure of submatrix B cannot change then we know that 835a30f8f8cSSatish Balay no processor disassembled thus we can skip this stuff 836a30f8f8cSSatish Balay */ 837a30f8f8cSSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 838b2566f29SBarry Smith ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 839a30f8f8cSSatish Balay if (mat->was_assembled && !other_disassembled) { 840ab9863d7SBarry Smith ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 841a30f8f8cSSatish Balay } 842a30f8f8cSSatish Balay } 843a30f8f8cSSatish Balay 844a30f8f8cSSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 84540781036SHong Zhang ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 846a30f8f8cSSatish Balay } 847a30f8f8cSSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 848a30f8f8cSSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 849a30f8f8cSSatish Balay 85074ed9c26SBarry Smith ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 85126fbe8dcSKarl Rupp 852a30f8f8cSSatish Balay baij->rowvalues = 0; 8534f9cfa9eSBarry Smith 8544f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 8554f9cfa9eSBarry Smith if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 856e56f5c9eSBarry Smith PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 857b2566f29SBarry Smith ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 858e56f5c9eSBarry Smith } 859a30f8f8cSSatish Balay PetscFunctionReturn(0); 860a30f8f8cSSatish Balay } 861a30f8f8cSSatish Balay 862dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 8639804daf3SBarry Smith #include <petscdraw.h> 8646849ba73SBarry Smith static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 865a30f8f8cSSatish Balay { 866a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 867dfbe8321SBarry Smith PetscErrorCode ierr; 868d0f46423SBarry Smith PetscInt bs = mat->rmap->bs; 8697da1fb6eSBarry Smith PetscMPIInt rank = baij->rank; 870ace3abfcSBarry Smith PetscBool iascii,isdraw; 871b0a32e0cSBarry Smith PetscViewer sviewer; 872f3ef73ceSBarry Smith PetscViewerFormat format; 873a30f8f8cSSatish Balay 874a30f8f8cSSatish Balay PetscFunctionBegin; 875251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 876251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 87732077d6dSBarry Smith if (iascii) { 878b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 879456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 880a30f8f8cSSatish Balay MatInfo info; 881ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 882a30f8f8cSSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 8831575c14dSBarry Smith ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 884b1e9c6f1SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 885a30f8f8cSSatish Balay ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 886e6dd01d4SJed Brown ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 887a30f8f8cSSatish Balay ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 888e6dd01d4SJed Brown ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 889b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 8901575c14dSBarry Smith ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 89107d81ca4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 892a30f8f8cSSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 893a30f8f8cSSatish Balay PetscFunctionReturn(0); 894fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 89577431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 896a30f8f8cSSatish Balay PetscFunctionReturn(0); 897c1490034SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 898c1490034SHong Zhang PetscFunctionReturn(0); 899a30f8f8cSSatish Balay } 900a30f8f8cSSatish Balay } 901a30f8f8cSSatish Balay 902a30f8f8cSSatish Balay if (isdraw) { 903b0a32e0cSBarry Smith PetscDraw draw; 904ace3abfcSBarry Smith PetscBool isnull; 905b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 90645f3bb6eSLisandro Dalcin ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 90745f3bb6eSLisandro Dalcin if (isnull) PetscFunctionReturn(0); 908a30f8f8cSSatish Balay } 909a30f8f8cSSatish Balay 9107da1fb6eSBarry Smith { 911a30f8f8cSSatish Balay /* assemble the entire matrix onto first processor. */ 912a30f8f8cSSatish Balay Mat A; 91365d70643SHong Zhang Mat_SeqSBAIJ *Aloc; 91465d70643SHong Zhang Mat_SeqBAIJ *Bloc; 915d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 916a30f8f8cSSatish Balay MatScalar *a; 9173e219373SBarry Smith const char *matname; 918a30f8f8cSSatish Balay 919f204ca49SKris Buschelman /* Should this be the same type as mat? */ 920ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 921a30f8f8cSSatish Balay if (!rank) { 922f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 923a30f8f8cSSatish Balay } else { 924f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 925a30f8f8cSSatish Balay } 926f204ca49SKris Buschelman ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 9270298fd71SBarry Smith ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 9282b82e772SSatish Balay ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 9293bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 930a30f8f8cSSatish Balay 931a30f8f8cSSatish Balay /* copy over the A part */ 93265d70643SHong Zhang Aloc = (Mat_SeqSBAIJ*)baij->A->data; 933a30f8f8cSSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 934785e854fSJed Brown ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 935a30f8f8cSSatish Balay 936a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 937e9f7bc9eSHong Zhang rvals[0] = bs*(baij->rstartbs + i); 93826fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 939a30f8f8cSSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 940e9f7bc9eSHong Zhang col = (baij->cstartbs+aj[j])*bs; 941a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 942dd6ea824SBarry Smith ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 94326fbe8dcSKarl Rupp col++; 94426fbe8dcSKarl Rupp a += bs; 945a30f8f8cSSatish Balay } 946a30f8f8cSSatish Balay } 947a30f8f8cSSatish Balay } 948a30f8f8cSSatish Balay /* copy over the B part */ 94965d70643SHong Zhang Bloc = (Mat_SeqBAIJ*)baij->B->data; 95065d70643SHong Zhang ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 951a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 952e9f7bc9eSHong Zhang 953e9f7bc9eSHong Zhang rvals[0] = bs*(baij->rstartbs + i); 95426fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 955a30f8f8cSSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 956a30f8f8cSSatish Balay col = baij->garray[aj[j]]*bs; 957a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 958799bb49cSHong Zhang ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 95926fbe8dcSKarl Rupp col++; 96026fbe8dcSKarl Rupp a += bs; 961a30f8f8cSSatish Balay } 962a30f8f8cSSatish Balay } 963a30f8f8cSSatish Balay } 964a30f8f8cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 965a30f8f8cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 966a30f8f8cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 967a30f8f8cSSatish Balay /* 968a30f8f8cSSatish Balay Everyone has to call to draw the matrix since the graphics waits are 969b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 970a30f8f8cSSatish Balay */ 9713f08860eSBarry Smith ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 972ade3a672SBarry Smith ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 9733e219373SBarry Smith if (!rank) { 974ade3a672SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 975383922c3SLisandro Dalcin ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 976a30f8f8cSSatish Balay } 9773f08860eSBarry Smith ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 9781575c14dSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 9796bf464f9SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 980a30f8f8cSSatish Balay } 981a30f8f8cSSatish Balay PetscFunctionReturn(0); 982a30f8f8cSSatish Balay } 983a30f8f8cSSatish Balay 984618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 985618cc2edSLisandro Dalcin #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 986d1654148SHong Zhang 987dfbe8321SBarry Smith PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 988a30f8f8cSSatish Balay { 989dfbe8321SBarry Smith PetscErrorCode ierr; 990ace3abfcSBarry Smith PetscBool iascii,isdraw,issocket,isbinary; 991a30f8f8cSSatish Balay 992a30f8f8cSSatish Balay PetscFunctionBegin; 993251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 994251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 995251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 996251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 997d1654148SHong Zhang if (iascii || isdraw || issocket) { 998a30f8f8cSSatish Balay ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 999d1654148SHong Zhang } else if (isbinary) { 1000d1654148SHong Zhang ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1001a30f8f8cSSatish Balay } 1002a30f8f8cSSatish Balay PetscFunctionReturn(0); 1003a30f8f8cSSatish Balay } 1004a30f8f8cSSatish Balay 1005dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 1006a30f8f8cSSatish Balay { 1007a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1008dfbe8321SBarry Smith PetscErrorCode ierr; 1009a30f8f8cSSatish Balay 1010a30f8f8cSSatish Balay PetscFunctionBegin; 1011a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG) 1012d0f46423SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1013a30f8f8cSSatish Balay #endif 1014a30f8f8cSSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1015a30f8f8cSSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 10166bf464f9SBarry Smith ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 10176bf464f9SBarry Smith ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1018a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 10196bc0bbbfSBarry Smith ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1020a30f8f8cSSatish Balay #else 102105b42c5fSBarry Smith ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1022a30f8f8cSSatish Balay #endif 102305b42c5fSBarry Smith ierr = PetscFree(baij->garray);CHKERRQ(ierr); 10246bf464f9SBarry Smith ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 10256bf464f9SBarry Smith ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 10266bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 10276bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 10286bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 10296bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 10306bf464f9SBarry Smith ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 10316bf464f9SBarry Smith ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 10325755ff91SHong Zhang ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 103305b42c5fSBarry Smith ierr = PetscFree(baij->barray);CHKERRQ(ierr); 103405b42c5fSBarry Smith ierr = PetscFree(baij->hd);CHKERRQ(ierr); 10356bf464f9SBarry Smith ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 10366bf464f9SBarry Smith ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 10376bf464f9SBarry Smith ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 1038ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 103905b42c5fSBarry Smith ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1040a30f8f8cSSatish Balay #endif 104159ffdab8SBarry Smith ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 104259ffdab8SBarry Smith ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 1043899cda47SBarry Smith ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1044bf0cc555SLisandro Dalcin ierr = PetscFree(mat->data);CHKERRQ(ierr); 1045901853e0SKris Buschelman 1046dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1047bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1048bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1049bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1050d2c30c80SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 10516214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 10526214f412SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 10536214f412SHong Zhang #endif 1054*d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 1055*d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL);CHKERRQ(ierr); 1056*d24d4204SJose E. Roman #endif 1057b147fbf3SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr); 1058b147fbf3SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr); 1059a30f8f8cSSatish Balay PetscFunctionReturn(0); 1060a30f8f8cSSatish Balay } 1061a30f8f8cSSatish Balay 1062547795f9SHong Zhang PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 1063547795f9SHong Zhang { 1064547795f9SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1065547795f9SHong Zhang PetscErrorCode ierr; 1066eb1ec7c1SStefano Zampini PetscInt mbs=a->mbs,bs=A->rmap->bs; 10676de40e93SBarry Smith PetscScalar *from; 10686de40e93SBarry Smith const PetscScalar *x; 1069547795f9SHong Zhang 1070547795f9SHong Zhang PetscFunctionBegin; 1071547795f9SHong Zhang /* diagonal part */ 1072547795f9SHong Zhang ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1073547795f9SHong Zhang ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1074547795f9SHong Zhang 1075547795f9SHong Zhang /* subdiagonal part */ 1076547795f9SHong Zhang ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1077547795f9SHong Zhang 1078547795f9SHong Zhang /* copy x into the vec slvec0 */ 1079547795f9SHong Zhang ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 10806de40e93SBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1081547795f9SHong Zhang 1082580bdb30SBarry Smith ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1083547795f9SHong Zhang ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 10846de40e93SBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1085547795f9SHong Zhang 1086547795f9SHong Zhang ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087547795f9SHong Zhang ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088547795f9SHong Zhang /* supperdiagonal part */ 1089547795f9SHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1090547795f9SHong Zhang PetscFunctionReturn(0); 1091547795f9SHong Zhang } 1092547795f9SHong Zhang 1093dfbe8321SBarry Smith PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1094a9d4b620SHong Zhang { 1095a9d4b620SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1096dfbe8321SBarry Smith PetscErrorCode ierr; 1097eb1ec7c1SStefano Zampini PetscInt mbs=a->mbs,bs=A->rmap->bs; 1098d9ca1df4SBarry Smith PetscScalar *from; 1099d9ca1df4SBarry Smith const PetscScalar *x; 1100a9d4b620SHong Zhang 1101a9d4b620SHong Zhang PetscFunctionBegin; 1102a9d4b620SHong Zhang /* diagonal part */ 1103a9d4b620SHong Zhang ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 1104fa22f6d0SBarry Smith ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 1105a9d4b620SHong Zhang 1106a9d4b620SHong Zhang /* subdiagonal part */ 1107a9d4b620SHong Zhang ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1108fc165ae2SBarry Smith 1109a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 11101ebc52fbSHong Zhang ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1111d9ca1df4SBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1112a9d4b620SHong Zhang 1113580bdb30SBarry Smith ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1114fc165ae2SBarry Smith ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1115d9ca1df4SBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1116fc165ae2SBarry Smith 1117fc165ae2SBarry Smith ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118ca9f406cSSatish Balay ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119a9d4b620SHong Zhang /* supperdiagonal part */ 1120a9d4b620SHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 1121a9d4b620SHong Zhang PetscFunctionReturn(0); 1122a9d4b620SHong Zhang } 1123a9d4b620SHong Zhang 1124dfbe8321SBarry Smith PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 1125a30f8f8cSSatish Balay { 1126a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1127dfbe8321SBarry Smith PetscErrorCode ierr; 11281302d50aSBarry Smith PetscInt nt; 1129a30f8f8cSSatish Balay 1130a30f8f8cSSatish Balay PetscFunctionBegin; 1131a30f8f8cSSatish Balay ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1132e7e72b3dSBarry Smith if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1133e7e72b3dSBarry Smith 1134a30f8f8cSSatish Balay ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1135e7e72b3dSBarry Smith if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 113665d70643SHong Zhang 1137ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138b941877fSHong Zhang /* do diagonal part */ 1139b941877fSHong Zhang ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1140b941877fSHong Zhang /* do supperdiagonal part */ 1141ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142b941877fSHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1143b941877fSHong Zhang /* do subdiagonal part */ 1144b941877fSHong Zhang ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1145ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1146ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1147a30f8f8cSSatish Balay PetscFunctionReturn(0); 1148a30f8f8cSSatish Balay } 1149a30f8f8cSSatish Balay 1150eb1ec7c1SStefano Zampini PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz) 1151eb1ec7c1SStefano Zampini { 1152eb1ec7c1SStefano Zampini Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1153eb1ec7c1SStefano Zampini PetscErrorCode ierr; 1154eb1ec7c1SStefano Zampini PetscInt mbs=a->mbs,bs=A->rmap->bs; 1155eb1ec7c1SStefano Zampini PetscScalar *from,zero=0.0; 1156eb1ec7c1SStefano Zampini const PetscScalar *x; 1157eb1ec7c1SStefano Zampini 1158eb1ec7c1SStefano Zampini PetscFunctionBegin; 1159eb1ec7c1SStefano Zampini /* diagonal part */ 1160eb1ec7c1SStefano Zampini ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 1161eb1ec7c1SStefano Zampini ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1162eb1ec7c1SStefano Zampini 1163eb1ec7c1SStefano Zampini /* subdiagonal part */ 1164eb1ec7c1SStefano Zampini ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1165eb1ec7c1SStefano Zampini 1166eb1ec7c1SStefano Zampini /* copy x into the vec slvec0 */ 1167eb1ec7c1SStefano Zampini ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1168eb1ec7c1SStefano Zampini ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1169eb1ec7c1SStefano Zampini ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 1170eb1ec7c1SStefano Zampini ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1171eb1ec7c1SStefano Zampini 1172eb1ec7c1SStefano Zampini ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1173eb1ec7c1SStefano Zampini ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1174eb1ec7c1SStefano Zampini ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1175eb1ec7c1SStefano Zampini 1176eb1ec7c1SStefano Zampini /* supperdiagonal part */ 1177eb1ec7c1SStefano Zampini ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1178eb1ec7c1SStefano Zampini PetscFunctionReturn(0); 1179eb1ec7c1SStefano Zampini } 1180eb1ec7c1SStefano Zampini 1181dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1182a30f8f8cSSatish Balay { 1183de8b6608SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1184dfbe8321SBarry Smith PetscErrorCode ierr; 1185d0f46423SBarry Smith PetscInt mbs=a->mbs,bs=A->rmap->bs; 1186d9ca1df4SBarry Smith PetscScalar *from,zero=0.0; 1187d9ca1df4SBarry Smith const PetscScalar *x; 1188a9d4b620SHong Zhang 1189a9d4b620SHong Zhang PetscFunctionBegin; 1190a9d4b620SHong Zhang /* diagonal part */ 1191a9d4b620SHong Zhang ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 11922dcb1b2aSMatthew Knepley ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 1193a9d4b620SHong Zhang 1194a9d4b620SHong Zhang /* subdiagonal part */ 1195a9d4b620SHong Zhang ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 1196a9d4b620SHong Zhang 1197a9d4b620SHong Zhang /* copy x into the vec slvec0 */ 11981ebc52fbSHong Zhang ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 1199d9ca1df4SBarry Smith ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1200580bdb30SBarry Smith ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 12011ebc52fbSHong Zhang ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 1202a9d4b620SHong Zhang 1203ca9f406cSSatish Balay ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1204d9ca1df4SBarry Smith ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1205ca9f406cSSatish Balay ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206a9d4b620SHong Zhang 1207a9d4b620SHong Zhang /* supperdiagonal part */ 1208a9d4b620SHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 1209a9d4b620SHong Zhang PetscFunctionReturn(0); 1210a9d4b620SHong Zhang } 1211a9d4b620SHong Zhang 1212dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 1213a9d4b620SHong Zhang { 1214a9d4b620SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1215dfbe8321SBarry Smith PetscErrorCode ierr; 1216a30f8f8cSSatish Balay 1217a30f8f8cSSatish Balay PetscFunctionBegin; 1218ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1219b941877fSHong Zhang /* do diagonal part */ 1220b941877fSHong Zhang ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1221b941877fSHong Zhang /* do supperdiagonal part */ 1222ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223de8b6608SHong Zhang ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1224de8b6608SHong Zhang 1225b941877fSHong Zhang /* do subdiagonal part */ 1226a30f8f8cSSatish Balay ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1227ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1228ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1229a30f8f8cSSatish Balay PetscFunctionReturn(0); 1230a30f8f8cSSatish Balay } 1231a30f8f8cSSatish Balay 1232a30f8f8cSSatish Balay /* 1233a30f8f8cSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1234a30f8f8cSSatish Balay diagonal block 1235a30f8f8cSSatish Balay */ 1236dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1237a30f8f8cSSatish Balay { 1238a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1239dfbe8321SBarry Smith PetscErrorCode ierr; 1240a30f8f8cSSatish Balay 1241a30f8f8cSSatish Balay PetscFunctionBegin; 1242e32f2f54SBarry Smith /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1243a30f8f8cSSatish Balay ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1244a30f8f8cSSatish Balay PetscFunctionReturn(0); 1245a30f8f8cSSatish Balay } 1246a30f8f8cSSatish Balay 1247f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1248a30f8f8cSSatish Balay { 1249a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1250dfbe8321SBarry Smith PetscErrorCode ierr; 1251a30f8f8cSSatish Balay 1252a30f8f8cSSatish Balay PetscFunctionBegin; 1253f4df32b1SMatthew Knepley ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1254f4df32b1SMatthew Knepley ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1255a30f8f8cSSatish Balay PetscFunctionReturn(0); 1256a30f8f8cSSatish Balay } 1257a30f8f8cSSatish Balay 12581302d50aSBarry Smith PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1259a30f8f8cSSatish Balay { 1260d0d4cfc2SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1261d0d4cfc2SHong Zhang PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1262d0d4cfc2SHong Zhang PetscErrorCode ierr; 1263d0f46423SBarry Smith PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1264d0f46423SBarry Smith PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1265899cda47SBarry Smith PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1266d0d4cfc2SHong Zhang 1267a30f8f8cSSatish Balay PetscFunctionBegin; 1268e32f2f54SBarry Smith if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1269d0d4cfc2SHong Zhang mat->getrowactive = PETSC_TRUE; 1270d0d4cfc2SHong Zhang 1271d0d4cfc2SHong Zhang if (!mat->rowvalues && (idx || v)) { 1272d0d4cfc2SHong Zhang /* 1273d0d4cfc2SHong Zhang allocate enough space to hold information from the longest row. 1274d0d4cfc2SHong Zhang */ 1275d0d4cfc2SHong Zhang Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1276d0d4cfc2SHong Zhang Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1277d0d4cfc2SHong Zhang PetscInt max = 1,mbs = mat->mbs,tmp; 1278d0d4cfc2SHong Zhang for (i=0; i<mbs; i++) { 1279d0d4cfc2SHong Zhang tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 128026fbe8dcSKarl Rupp if (max < tmp) max = tmp; 1281d0d4cfc2SHong Zhang } 1282dcca6d9dSJed Brown ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1283d0d4cfc2SHong Zhang } 1284d0d4cfc2SHong Zhang 1285e7e72b3dSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1286d0d4cfc2SHong Zhang lrow = row - brstart; /* local row index */ 1287d0d4cfc2SHong Zhang 1288d0d4cfc2SHong Zhang pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1289d0d4cfc2SHong Zhang if (!v) {pvA = 0; pvB = 0;} 1290d0d4cfc2SHong Zhang if (!idx) {pcA = 0; if (!v) pcB = 0;} 1291d0d4cfc2SHong Zhang ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1292d0d4cfc2SHong Zhang ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1293d0d4cfc2SHong Zhang nztot = nzA + nzB; 1294d0d4cfc2SHong Zhang 1295d0d4cfc2SHong Zhang cmap = mat->garray; 1296d0d4cfc2SHong Zhang if (v || idx) { 1297d0d4cfc2SHong Zhang if (nztot) { 1298d0d4cfc2SHong Zhang /* Sort by increasing column numbers, assuming A and B already sorted */ 1299d0d4cfc2SHong Zhang PetscInt imark = -1; 1300d0d4cfc2SHong Zhang if (v) { 1301d0d4cfc2SHong Zhang *v = v_p = mat->rowvalues; 1302d0d4cfc2SHong Zhang for (i=0; i<nzB; i++) { 1303d0d4cfc2SHong Zhang if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1304d0d4cfc2SHong Zhang else break; 1305d0d4cfc2SHong Zhang } 1306d0d4cfc2SHong Zhang imark = i; 1307d0d4cfc2SHong Zhang for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1308d0d4cfc2SHong Zhang for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1309d0d4cfc2SHong Zhang } 1310d0d4cfc2SHong Zhang if (idx) { 1311d0d4cfc2SHong Zhang *idx = idx_p = mat->rowindices; 1312d0d4cfc2SHong Zhang if (imark > -1) { 1313d0d4cfc2SHong Zhang for (i=0; i<imark; i++) { 1314d0d4cfc2SHong Zhang idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1315d0d4cfc2SHong Zhang } 1316d0d4cfc2SHong Zhang } else { 1317d0d4cfc2SHong Zhang for (i=0; i<nzB; i++) { 131826fbe8dcSKarl Rupp if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1319d0d4cfc2SHong Zhang else break; 1320d0d4cfc2SHong Zhang } 1321d0d4cfc2SHong Zhang imark = i; 1322d0d4cfc2SHong Zhang } 1323d0d4cfc2SHong Zhang for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1324d0d4cfc2SHong Zhang for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1325d0d4cfc2SHong Zhang } 1326d0d4cfc2SHong Zhang } else { 1327d0d4cfc2SHong Zhang if (idx) *idx = 0; 1328d0d4cfc2SHong Zhang if (v) *v = 0; 1329d0d4cfc2SHong Zhang } 1330d0d4cfc2SHong Zhang } 1331d0d4cfc2SHong Zhang *nz = nztot; 1332d0d4cfc2SHong Zhang ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1333d0d4cfc2SHong Zhang ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1334a30f8f8cSSatish Balay PetscFunctionReturn(0); 1335a30f8f8cSSatish Balay } 1336a30f8f8cSSatish Balay 13371302d50aSBarry Smith PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1338a30f8f8cSSatish Balay { 1339a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1340a30f8f8cSSatish Balay 1341a30f8f8cSSatish Balay PetscFunctionBegin; 1342e7e72b3dSBarry Smith if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1343a30f8f8cSSatish Balay baij->getrowactive = PETSC_FALSE; 1344a30f8f8cSSatish Balay PetscFunctionReturn(0); 1345a30f8f8cSSatish Balay } 1346a30f8f8cSSatish Balay 1347d0d4cfc2SHong Zhang PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1348d0d4cfc2SHong Zhang { 1349d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1350d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1351d0d4cfc2SHong Zhang 1352d0d4cfc2SHong Zhang PetscFunctionBegin; 1353d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_TRUE; 1354d0d4cfc2SHong Zhang PetscFunctionReturn(0); 1355d0d4cfc2SHong Zhang } 1356d0d4cfc2SHong Zhang PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1357d0d4cfc2SHong Zhang { 1358d0d4cfc2SHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1359d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1360d0d4cfc2SHong Zhang 1361d0d4cfc2SHong Zhang PetscFunctionBegin; 1362d0d4cfc2SHong Zhang aA->getrow_utriangular = PETSC_FALSE; 1363d0d4cfc2SHong Zhang PetscFunctionReturn(0); 1364d0d4cfc2SHong Zhang } 1365d0d4cfc2SHong Zhang 136699cafbc1SBarry Smith PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 136799cafbc1SBarry Smith { 136899cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 136999cafbc1SBarry Smith PetscErrorCode ierr; 137099cafbc1SBarry Smith 137199cafbc1SBarry Smith PetscFunctionBegin; 137299cafbc1SBarry Smith ierr = MatRealPart(a->A);CHKERRQ(ierr); 137399cafbc1SBarry Smith ierr = MatRealPart(a->B);CHKERRQ(ierr); 137499cafbc1SBarry Smith PetscFunctionReturn(0); 137599cafbc1SBarry Smith } 137699cafbc1SBarry Smith 137799cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 137899cafbc1SBarry Smith { 137999cafbc1SBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 138099cafbc1SBarry Smith PetscErrorCode ierr; 138199cafbc1SBarry Smith 138299cafbc1SBarry Smith PetscFunctionBegin; 138399cafbc1SBarry Smith ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 138499cafbc1SBarry Smith ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 138599cafbc1SBarry Smith PetscFunctionReturn(0); 138699cafbc1SBarry Smith } 138799cafbc1SBarry Smith 13887dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 138936032a97SHong Zhang Input: isrow - distributed(parallel), 139036032a97SHong Zhang iscol_local - locally owned (seq) 139136032a97SHong Zhang */ 139236032a97SHong Zhang PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 13938f46ffcaSHong Zhang { 13948f46ffcaSHong Zhang PetscErrorCode ierr; 13958f46ffcaSHong Zhang PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 13968f46ffcaSHong Zhang const PetscInt *ptr1,*ptr2; 139736032a97SHong Zhang 139836032a97SHong Zhang PetscFunctionBegin; 13998f46ffcaSHong Zhang ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 14008f46ffcaSHong Zhang ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 14011098a8e8SHong Zhang if (sz1 > sz2) { 14021098a8e8SHong Zhang *flg = PETSC_FALSE; 14031098a8e8SHong Zhang PetscFunctionReturn(0); 14041098a8e8SHong Zhang } 14058f46ffcaSHong Zhang 14068f46ffcaSHong Zhang ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 14078f46ffcaSHong Zhang ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 14088f46ffcaSHong Zhang 14098f46ffcaSHong Zhang ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 14108f46ffcaSHong Zhang ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1411580bdb30SBarry Smith ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr); 1412580bdb30SBarry Smith ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr); 14138f46ffcaSHong Zhang ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 14148f46ffcaSHong Zhang ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 14158f46ffcaSHong Zhang 14168f46ffcaSHong Zhang nmatch=0; 14178f46ffcaSHong Zhang k = 0; 14188f46ffcaSHong Zhang for (i=0; i<sz1; i++){ 14198f46ffcaSHong Zhang for (j=k; j<sz2; j++){ 14208f46ffcaSHong Zhang if (a1[i] == a2[j]) { 14218f46ffcaSHong Zhang k = j; nmatch++; 14228f46ffcaSHong Zhang break; 14238f46ffcaSHong Zhang } 14248f46ffcaSHong Zhang } 14258f46ffcaSHong Zhang } 14268f46ffcaSHong Zhang ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 14278f46ffcaSHong Zhang ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 14288f46ffcaSHong Zhang ierr = PetscFree(a1);CHKERRQ(ierr); 14298f46ffcaSHong Zhang ierr = PetscFree(a2);CHKERRQ(ierr); 14301098a8e8SHong Zhang if (nmatch < sz1) { 14311098a8e8SHong Zhang *flg = PETSC_FALSE; 14321098a8e8SHong Zhang } else { 14331098a8e8SHong Zhang *flg = PETSC_TRUE; 14341098a8e8SHong Zhang } 143536032a97SHong Zhang PetscFunctionReturn(0); 14368f46ffcaSHong Zhang } 143736032a97SHong Zhang 14387dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 143936032a97SHong Zhang { 144036032a97SHong Zhang PetscErrorCode ierr; 144136032a97SHong Zhang IS iscol_local; 144236032a97SHong Zhang PetscInt csize; 144336032a97SHong Zhang PetscBool isequal; 144436032a97SHong Zhang 144536032a97SHong Zhang PetscFunctionBegin; 144636032a97SHong Zhang ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 144736032a97SHong Zhang if (call == MAT_REUSE_MATRIX) { 144836032a97SHong Zhang ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 144936032a97SHong Zhang if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 145036032a97SHong Zhang } else { 145136032a97SHong Zhang ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 145236032a97SHong Zhang ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 145336032a97SHong Zhang if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 14548f46ffcaSHong Zhang } 14558f46ffcaSHong Zhang 14567dae84e0SHong Zhang /* now call MatCreateSubMatrix_MPIBAIJ() */ 14577dae84e0SHong Zhang ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 14588f46ffcaSHong Zhang if (call == MAT_INITIAL_MATRIX) { 14598f46ffcaSHong Zhang ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 14608f46ffcaSHong Zhang ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 14618f46ffcaSHong Zhang } 14628f46ffcaSHong Zhang PetscFunctionReturn(0); 14638f46ffcaSHong Zhang } 14648f46ffcaSHong Zhang 1465dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1466a30f8f8cSSatish Balay { 1467a30f8f8cSSatish Balay Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1468dfbe8321SBarry Smith PetscErrorCode ierr; 1469a30f8f8cSSatish Balay 1470a30f8f8cSSatish Balay PetscFunctionBegin; 1471a30f8f8cSSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1472a30f8f8cSSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1473a30f8f8cSSatish Balay PetscFunctionReturn(0); 1474a30f8f8cSSatish Balay } 1475a30f8f8cSSatish Balay 1476dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1477a30f8f8cSSatish Balay { 1478a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1479a30f8f8cSSatish Balay Mat A = a->A,B = a->B; 1480dfbe8321SBarry Smith PetscErrorCode ierr; 14813966268fSBarry Smith PetscLogDouble isend[5],irecv[5]; 1482a30f8f8cSSatish Balay 1483a30f8f8cSSatish Balay PetscFunctionBegin; 1484d0f46423SBarry Smith info->block_size = (PetscReal)matin->rmap->bs; 148526fbe8dcSKarl Rupp 1486a30f8f8cSSatish Balay ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 148726fbe8dcSKarl Rupp 1488a30f8f8cSSatish Balay isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1489a30f8f8cSSatish Balay isend[3] = info->memory; isend[4] = info->mallocs; 149026fbe8dcSKarl Rupp 1491a30f8f8cSSatish Balay ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 149226fbe8dcSKarl Rupp 1493a30f8f8cSSatish Balay isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1494a30f8f8cSSatish Balay isend[3] += info->memory; isend[4] += info->mallocs; 1495a30f8f8cSSatish Balay if (flag == MAT_LOCAL) { 1496a30f8f8cSSatish Balay info->nz_used = isend[0]; 1497a30f8f8cSSatish Balay info->nz_allocated = isend[1]; 1498a30f8f8cSSatish Balay info->nz_unneeded = isend[2]; 1499a30f8f8cSSatish Balay info->memory = isend[3]; 1500a30f8f8cSSatish Balay info->mallocs = isend[4]; 1501a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 15023966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 150326fbe8dcSKarl Rupp 1504a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1505a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1506a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1507a30f8f8cSSatish Balay info->memory = irecv[3]; 1508a30f8f8cSSatish Balay info->mallocs = irecv[4]; 1509a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 15103966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 151126fbe8dcSKarl Rupp 1512a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1513a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1514a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1515a30f8f8cSSatish Balay info->memory = irecv[3]; 1516a30f8f8cSSatish Balay info->mallocs = irecv[4]; 1517f23aa3ddSBarry Smith } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1518a30f8f8cSSatish Balay info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1519a30f8f8cSSatish Balay info->fill_ratio_needed = 0; 1520a30f8f8cSSatish Balay info->factor_mallocs = 0; 1521a30f8f8cSSatish Balay PetscFunctionReturn(0); 1522a30f8f8cSSatish Balay } 1523a30f8f8cSSatish Balay 1524ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1525a30f8f8cSSatish Balay { 1526a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1527d0d4cfc2SHong Zhang Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1528dfbe8321SBarry Smith PetscErrorCode ierr; 1529a30f8f8cSSatish Balay 1530a30f8f8cSSatish Balay PetscFunctionBegin; 1531e98b92d7SKris Buschelman switch (op) { 1532512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 1533e98b92d7SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 153428b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1535a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 1536c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 1537e98b92d7SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 153843674050SBarry Smith MatCheckPreallocated(A,1); 15394e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 15404e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1541e98b92d7SKris Buschelman break; 1542e98b92d7SKris Buschelman case MAT_ROW_ORIENTED: 154343674050SBarry Smith MatCheckPreallocated(A,1); 15444e0d8c25SBarry Smith a->roworiented = flg; 154526fbe8dcSKarl Rupp 15464e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 15474e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1548e98b92d7SKris Buschelman break; 15494e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 1550071fcb05SBarry Smith case MAT_SORTED_FULL: 1551290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1552e98b92d7SKris Buschelman break; 1553e98b92d7SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15544e0d8c25SBarry Smith a->donotstash = flg; 1555e98b92d7SKris Buschelman break; 1556e98b92d7SKris Buschelman case MAT_USE_HASH_TABLE: 15574e0d8c25SBarry Smith a->ht_flag = flg; 1558e98b92d7SKris Buschelman break; 15599a4540c5SBarry Smith case MAT_HERMITIAN: 156043674050SBarry Smith MatCheckPreallocated(A,1); 1561eeffb40dSHong Zhang ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 15620f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX) 1563eb1ec7c1SStefano Zampini if (flg) { /* need different mat-vec ops */ 1564547795f9SHong Zhang A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1565eb1ec7c1SStefano Zampini A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1566eb1ec7c1SStefano Zampini A->ops->multtranspose = NULL; 1567eb1ec7c1SStefano Zampini A->ops->multtransposeadd = NULL; 1568eb1ec7c1SStefano Zampini A->symmetric = PETSC_FALSE; 1569eb1ec7c1SStefano Zampini } 15700f2140c7SStefano Zampini #endif 1571eeffb40dSHong Zhang break; 1572ffa07934SHong Zhang case MAT_SPD: 157377e54ba9SKris Buschelman case MAT_SYMMETRIC: 157443674050SBarry Smith MatCheckPreallocated(A,1); 1575eeffb40dSHong Zhang ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1576eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 1577eb1ec7c1SStefano Zampini if (flg) { /* restore to use default mat-vec ops */ 1578eb1ec7c1SStefano Zampini A->ops->mult = MatMult_MPISBAIJ; 1579eb1ec7c1SStefano Zampini A->ops->multadd = MatMultAdd_MPISBAIJ; 1580eb1ec7c1SStefano Zampini A->ops->multtranspose = MatMult_MPISBAIJ; 1581eb1ec7c1SStefano Zampini A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1582eb1ec7c1SStefano Zampini } 1583eb1ec7c1SStefano Zampini #endif 1584eeffb40dSHong Zhang break; 158577e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 158643674050SBarry Smith MatCheckPreallocated(A,1); 1587eeffb40dSHong Zhang ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1588eeffb40dSHong Zhang break; 15899a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1590e32f2f54SBarry Smith if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1591290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 159277e54ba9SKris Buschelman break; 1593d0d4cfc2SHong Zhang case MAT_IGNORE_LOWER_TRIANGULAR: 15944e0d8c25SBarry Smith aA->ignore_ltriangular = flg; 1595d0d4cfc2SHong Zhang break; 1596d0d4cfc2SHong Zhang case MAT_ERROR_LOWER_TRIANGULAR: 15974e0d8c25SBarry Smith aA->ignore_ltriangular = flg; 1598d0d4cfc2SHong Zhang break; 1599d0d4cfc2SHong Zhang case MAT_GETROW_UPPERTRIANGULAR: 16004e0d8c25SBarry Smith aA->getrow_utriangular = flg; 1601d0d4cfc2SHong Zhang break; 1602e98b92d7SKris Buschelman default: 1603e32f2f54SBarry Smith SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1604a30f8f8cSSatish Balay } 1605a30f8f8cSSatish Balay PetscFunctionReturn(0); 1606a30f8f8cSSatish Balay } 1607a30f8f8cSSatish Balay 1608fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1609a30f8f8cSSatish Balay { 1610dfbe8321SBarry Smith PetscErrorCode ierr; 16116e111a19SKarl Rupp 1612a30f8f8cSSatish Balay PetscFunctionBegin; 1613cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX) { 1614999d9058SBarry Smith ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1615cf37664fSBarry Smith } else if (reuse == MAT_REUSE_MATRIX) { 1616cf37664fSBarry Smith ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1617fc4dec0aSBarry Smith } 16188115998fSBarry Smith PetscFunctionReturn(0); 1619a30f8f8cSSatish Balay } 1620a30f8f8cSSatish Balay 1621dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1622a30f8f8cSSatish Balay { 1623a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1624a30f8f8cSSatish Balay Mat a = baij->A, b=baij->B; 1625dfbe8321SBarry Smith PetscErrorCode ierr; 16265e90f9d9SHong Zhang PetscInt nv,m,n; 1627ace3abfcSBarry Smith PetscBool flg; 1628a30f8f8cSSatish Balay 1629a30f8f8cSSatish Balay PetscFunctionBegin; 1630a30f8f8cSSatish Balay if (ll != rr) { 1631b3bf805bSHong Zhang ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1632e7e72b3dSBarry Smith if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1633a30f8f8cSSatish Balay } 1634b3bf805bSHong Zhang if (!ll) PetscFunctionReturn(0); 1635b3bf805bSHong Zhang 16365e90f9d9SHong Zhang ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1637e32f2f54SBarry Smith if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1638b3bf805bSHong Zhang 16395e90f9d9SHong Zhang ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1640e32f2f54SBarry Smith if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 16415e90f9d9SHong Zhang 1642ca9f406cSSatish Balay ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 16435e90f9d9SHong Zhang 16445e90f9d9SHong Zhang /* left diagonalscale the off-diagonal part */ 16450298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 16465e90f9d9SHong Zhang 16475e90f9d9SHong Zhang /* scale the diagonal part */ 1648a30f8f8cSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1649a30f8f8cSSatish Balay 16505e90f9d9SHong Zhang /* right diagonalscale the off-diagonal part */ 1651ca9f406cSSatish Balay ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 16520298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1653a30f8f8cSSatish Balay PetscFunctionReturn(0); 1654a30f8f8cSSatish Balay } 1655a30f8f8cSSatish Balay 1656dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1657a30f8f8cSSatish Balay { 1658f3566a2aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1659dfbe8321SBarry Smith PetscErrorCode ierr; 1660a30f8f8cSSatish Balay 1661a30f8f8cSSatish Balay PetscFunctionBegin; 1662a30f8f8cSSatish Balay ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1663a30f8f8cSSatish Balay PetscFunctionReturn(0); 1664a30f8f8cSSatish Balay } 1665a30f8f8cSSatish Balay 16666849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1667a30f8f8cSSatish Balay 1668ace3abfcSBarry Smith PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1669a30f8f8cSSatish Balay { 1670a30f8f8cSSatish Balay Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1671a30f8f8cSSatish Balay Mat a,b,c,d; 1672ace3abfcSBarry Smith PetscBool flg; 1673dfbe8321SBarry Smith PetscErrorCode ierr; 1674a30f8f8cSSatish Balay 1675a30f8f8cSSatish Balay PetscFunctionBegin; 1676a30f8f8cSSatish Balay a = matA->A; b = matA->B; 1677a30f8f8cSSatish Balay c = matB->A; d = matB->B; 1678a30f8f8cSSatish Balay 1679a30f8f8cSSatish Balay ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1680abc0a331SBarry Smith if (flg) { 1681a30f8f8cSSatish Balay ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1682a30f8f8cSSatish Balay } 1683b2566f29SBarry Smith ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1684a30f8f8cSSatish Balay PetscFunctionReturn(0); 1685a30f8f8cSSatish Balay } 1686a30f8f8cSSatish Balay 16873c896bc6SHong Zhang PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 16883c896bc6SHong Zhang { 16893c896bc6SHong Zhang PetscErrorCode ierr; 16904c7a3774SStefano Zampini PetscBool isbaij; 16913c896bc6SHong Zhang 16923c896bc6SHong Zhang PetscFunctionBegin; 16934c7a3774SStefano Zampini ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr); 16944c7a3774SStefano Zampini if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name); 16953c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 16963c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1697d0d4cfc2SHong Zhang ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 16983c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1699d0d4cfc2SHong Zhang ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 17003c896bc6SHong Zhang } else { 17014c7a3774SStefano Zampini Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 17024c7a3774SStefano Zampini Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 17034c7a3774SStefano Zampini 17043c896bc6SHong Zhang ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 17053c896bc6SHong Zhang ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 17063c896bc6SHong Zhang } 1707cdc753b6SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 17083c896bc6SHong Zhang PetscFunctionReturn(0); 17093c896bc6SHong Zhang } 17103c896bc6SHong Zhang 17114994cf47SJed Brown PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1712273d9f13SBarry Smith { 1713dfbe8321SBarry Smith PetscErrorCode ierr; 1714273d9f13SBarry Smith 1715273d9f13SBarry Smith PetscFunctionBegin; 1716535b19f3SBarry Smith ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1717273d9f13SBarry Smith PetscFunctionReturn(0); 1718273d9f13SBarry Smith } 1719a5e6ed63SBarry Smith 17204fe895cdSHong Zhang PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 17214fe895cdSHong Zhang { 17224fe895cdSHong Zhang PetscErrorCode ierr; 17234fe895cdSHong Zhang Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 17244fe895cdSHong Zhang PetscBLASInt bnz,one=1; 17254fe895cdSHong Zhang Mat_SeqSBAIJ *xa,*ya; 17264fe895cdSHong Zhang Mat_SeqBAIJ *xb,*yb; 17274fe895cdSHong Zhang 17284fe895cdSHong Zhang PetscFunctionBegin; 17294fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 17304fe895cdSHong Zhang PetscScalar alpha = a; 17314fe895cdSHong Zhang xa = (Mat_SeqSBAIJ*)xx->A->data; 17324fe895cdSHong Zhang ya = (Mat_SeqSBAIJ*)yy->A->data; 1733c5df96a5SBarry Smith ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 17348b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 17354fe895cdSHong Zhang xb = (Mat_SeqBAIJ*)xx->B->data; 17364fe895cdSHong Zhang yb = (Mat_SeqBAIJ*)yy->B->data; 1737c5df96a5SBarry Smith ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 17388b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1739a3fa217bSJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1740ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1741ab784542SHong Zhang ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1742ab784542SHong Zhang ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1743ab784542SHong Zhang ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 17444fe895cdSHong Zhang } else { 17454de5dceeSHong Zhang Mat B; 17464de5dceeSHong Zhang PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 17474de5dceeSHong Zhang if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1748d0d4cfc2SHong Zhang ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 17494de5dceeSHong Zhang ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 17504de5dceeSHong Zhang ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 17514de5dceeSHong Zhang ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 17524de5dceeSHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 17534de5dceeSHong Zhang ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 17544de5dceeSHong Zhang ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 17554de5dceeSHong Zhang ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 17564de5dceeSHong Zhang ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 17574de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 17584de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 17594de5dceeSHong Zhang ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 17604de5dceeSHong Zhang ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 176128be2f97SBarry Smith ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 17624de5dceeSHong Zhang ierr = PetscFree(nnz_d);CHKERRQ(ierr); 17634de5dceeSHong Zhang ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1764d0d4cfc2SHong Zhang ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 17654de5dceeSHong Zhang ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 17664fe895cdSHong Zhang } 17674fe895cdSHong Zhang PetscFunctionReturn(0); 17684fe895cdSHong Zhang } 17694fe895cdSHong Zhang 17707dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1771a5e6ed63SBarry Smith { 17726849ba73SBarry Smith PetscErrorCode ierr; 17731302d50aSBarry Smith PetscInt i; 1774afebec48SHong Zhang PetscBool flg; 1775a5e6ed63SBarry Smith 17766849ba73SBarry Smith PetscFunctionBegin; 17777dae84e0SHong Zhang ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */ 1778a5e6ed63SBarry Smith for (i=0; i<n; i++) { 1779a5e6ed63SBarry Smith ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1780afebec48SHong Zhang if (!flg) { 1781b2fa50c1SHong Zhang ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr); 1782a5e6ed63SBarry Smith } 17834dcd73b1SHong Zhang } 1784a5e6ed63SBarry Smith PetscFunctionReturn(0); 1785a5e6ed63SBarry Smith } 1786a5e6ed63SBarry Smith 17877d68702bSBarry Smith PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a) 17887d68702bSBarry Smith { 17897d68702bSBarry Smith PetscErrorCode ierr; 17907d68702bSBarry Smith Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data; 17916f33a894SBarry Smith Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data; 17927d68702bSBarry Smith 17937d68702bSBarry Smith PetscFunctionBegin; 17946f33a894SBarry Smith if (!Y->preallocated) { 17957d68702bSBarry Smith ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 17966f33a894SBarry Smith } else if (!aij->nz) { 1797b83222d8SBarry Smith PetscInt nonew = aij->nonew; 17986f33a894SBarry Smith ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 1799b83222d8SBarry Smith aij->nonew = nonew; 18007d68702bSBarry Smith } 18017d68702bSBarry Smith ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 18027d68702bSBarry Smith PetscFunctionReturn(0); 18037d68702bSBarry Smith } 18047d68702bSBarry Smith 18053b49f96aSBarry Smith PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d) 18063b49f96aSBarry Smith { 18073b49f96aSBarry Smith Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 18083b49f96aSBarry Smith PetscErrorCode ierr; 18093b49f96aSBarry Smith 18103b49f96aSBarry Smith PetscFunctionBegin; 18113b49f96aSBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 18123b49f96aSBarry Smith ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 18133b49f96aSBarry Smith if (d) { 18143b49f96aSBarry Smith PetscInt rstart; 18153b49f96aSBarry Smith ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 18163b49f96aSBarry Smith *d += rstart/A->rmap->bs; 18173b49f96aSBarry Smith 18183b49f96aSBarry Smith } 18193b49f96aSBarry Smith PetscFunctionReturn(0); 18203b49f96aSBarry Smith } 18213b49f96aSBarry Smith 1822a5b7ff6bSBarry Smith PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1823a5b7ff6bSBarry Smith { 1824a5b7ff6bSBarry Smith PetscFunctionBegin; 1825a5b7ff6bSBarry Smith *a = ((Mat_MPISBAIJ*)A->data)->A; 1826a5b7ff6bSBarry Smith PetscFunctionReturn(0); 1827a5b7ff6bSBarry Smith } 18283b49f96aSBarry Smith 1829a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/ 18303964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1831a30f8f8cSSatish Balay MatGetRow_MPISBAIJ, 1832a30f8f8cSSatish Balay MatRestoreRow_MPISBAIJ, 1833a9d4b620SHong Zhang MatMult_MPISBAIJ, 183497304618SKris Buschelman /* 4*/ MatMultAdd_MPISBAIJ, 1835431c96f7SBarry Smith MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1836431c96f7SBarry Smith MatMultAdd_MPISBAIJ, 1837a30f8f8cSSatish Balay 0, 1838a30f8f8cSSatish Balay 0, 1839a30f8f8cSSatish Balay 0, 184097304618SKris Buschelman /* 10*/ 0, 1841a30f8f8cSSatish Balay 0, 1842a30f8f8cSSatish Balay 0, 184341f059aeSBarry Smith MatSOR_MPISBAIJ, 1844a30f8f8cSSatish Balay MatTranspose_MPISBAIJ, 184597304618SKris Buschelman /* 15*/ MatGetInfo_MPISBAIJ, 1846a30f8f8cSSatish Balay MatEqual_MPISBAIJ, 1847a30f8f8cSSatish Balay MatGetDiagonal_MPISBAIJ, 1848a30f8f8cSSatish Balay MatDiagonalScale_MPISBAIJ, 1849a30f8f8cSSatish Balay MatNorm_MPISBAIJ, 185097304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPISBAIJ, 1851a30f8f8cSSatish Balay MatAssemblyEnd_MPISBAIJ, 1852a30f8f8cSSatish Balay MatSetOption_MPISBAIJ, 1853a30f8f8cSSatish Balay MatZeroEntries_MPISBAIJ, 1854d519adbfSMatthew Knepley /* 24*/ 0, 1855a30f8f8cSSatish Balay 0, 1856a30f8f8cSSatish Balay 0, 1857a30f8f8cSSatish Balay 0, 1858a30f8f8cSSatish Balay 0, 18594994cf47SJed Brown /* 29*/ MatSetUp_MPISBAIJ, 1860b5df2d14SHong Zhang 0, 1861a30f8f8cSSatish Balay 0, 1862a5b7ff6bSBarry Smith MatGetDiagonalBlock_MPISBAIJ, 1863a30f8f8cSSatish Balay 0, 1864d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPISBAIJ, 1865a30f8f8cSSatish Balay 0, 1866a30f8f8cSSatish Balay 0, 1867a30f8f8cSSatish Balay 0, 1868a30f8f8cSSatish Balay 0, 1869d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPISBAIJ, 18707dae84e0SHong Zhang MatCreateSubMatrices_MPISBAIJ, 1871d94109b8SHong Zhang MatIncreaseOverlap_MPISBAIJ, 1872a30f8f8cSSatish Balay MatGetValues_MPISBAIJ, 18733c896bc6SHong Zhang MatCopy_MPISBAIJ, 1874d519adbfSMatthew Knepley /* 44*/ 0, 1875a30f8f8cSSatish Balay MatScale_MPISBAIJ, 18767d68702bSBarry Smith MatShift_MPISBAIJ, 1877a30f8f8cSSatish Balay 0, 1878a30f8f8cSSatish Balay 0, 1879f73d5cc4SBarry Smith /* 49*/ 0, 1880a30f8f8cSSatish Balay 0, 1881a30f8f8cSSatish Balay 0, 1882a30f8f8cSSatish Balay 0, 1883a30f8f8cSSatish Balay 0, 1884d519adbfSMatthew Knepley /* 54*/ 0, 1885a30f8f8cSSatish Balay 0, 1886a30f8f8cSSatish Balay MatSetUnfactored_MPISBAIJ, 1887a30f8f8cSSatish Balay 0, 1888a30f8f8cSSatish Balay MatSetValuesBlocked_MPISBAIJ, 18897dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1890a30f8f8cSSatish Balay 0, 1891a30f8f8cSSatish Balay 0, 1892357abbc8SBarry Smith 0, 189324d5174aSHong Zhang 0, 1894d519adbfSMatthew Knepley /* 64*/ 0, 189524d5174aSHong Zhang 0, 189624d5174aSHong Zhang 0, 189724d5174aSHong Zhang 0, 189824d5174aSHong Zhang 0, 1899d519adbfSMatthew Knepley /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 190024d5174aSHong Zhang 0, 190128d58a37SPierre Jolivet MatConvert_MPISBAIJ_Basic, 190297304618SKris Buschelman 0, 190397304618SKris Buschelman 0, 1904d519adbfSMatthew Knepley /* 74*/ 0, 190597304618SKris Buschelman 0, 190697304618SKris Buschelman 0, 190797304618SKris Buschelman 0, 190897304618SKris Buschelman 0, 1909d519adbfSMatthew Knepley /* 79*/ 0, 191097304618SKris Buschelman 0, 191197304618SKris Buschelman 0, 191297304618SKris Buschelman 0, 19135bba2384SShri Abhyankar MatLoad_MPISBAIJ, 1914d519adbfSMatthew Knepley /* 84*/ 0, 1915865e5f61SKris Buschelman 0, 1916865e5f61SKris Buschelman 0, 1917865e5f61SKris Buschelman 0, 1918865e5f61SKris Buschelman 0, 1919d519adbfSMatthew Knepley /* 89*/ 0, 1920865e5f61SKris Buschelman 0, 1921865e5f61SKris Buschelman 0, 1922865e5f61SKris Buschelman 0, 1923865e5f61SKris Buschelman 0, 1924d519adbfSMatthew Knepley /* 94*/ 0, 1925865e5f61SKris Buschelman 0, 1926865e5f61SKris Buschelman 0, 192799cafbc1SBarry Smith 0, 192899cafbc1SBarry Smith 0, 1929d519adbfSMatthew Knepley /* 99*/ 0, 193099cafbc1SBarry Smith 0, 193199cafbc1SBarry Smith 0, 193299cafbc1SBarry Smith 0, 193399cafbc1SBarry Smith 0, 1934d519adbfSMatthew Knepley /*104*/ 0, 193599cafbc1SBarry Smith MatRealPart_MPISBAIJ, 1936d0d4cfc2SHong Zhang MatImaginaryPart_MPISBAIJ, 1937d0d4cfc2SHong Zhang MatGetRowUpperTriangular_MPISBAIJ, 193895936485SShri Abhyankar MatRestoreRowUpperTriangular_MPISBAIJ, 193995936485SShri Abhyankar /*109*/ 0, 194095936485SShri Abhyankar 0, 194195936485SShri Abhyankar 0, 194295936485SShri Abhyankar 0, 19433b49f96aSBarry Smith MatMissingDiagonal_MPISBAIJ, 194495936485SShri Abhyankar /*114*/ 0, 194595936485SShri Abhyankar 0, 194695936485SShri Abhyankar 0, 194795936485SShri Abhyankar 0, 194895936485SShri Abhyankar 0, 194995936485SShri Abhyankar /*119*/ 0, 195095936485SShri Abhyankar 0, 195195936485SShri Abhyankar 0, 19523964eb88SJed Brown 0, 19533964eb88SJed Brown 0, 19543964eb88SJed Brown /*124*/ 0, 19553964eb88SJed Brown 0, 19563964eb88SJed Brown 0, 19573964eb88SJed Brown 0, 19583964eb88SJed Brown 0, 19593964eb88SJed Brown /*129*/ 0, 19603964eb88SJed Brown 0, 19613964eb88SJed Brown 0, 19623964eb88SJed Brown 0, 19633964eb88SJed Brown 0, 19643964eb88SJed Brown /*134*/ 0, 19653964eb88SJed Brown 0, 19663964eb88SJed Brown 0, 19673964eb88SJed Brown 0, 19683964eb88SJed Brown 0, 196946533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 1970f9426fe0SMark Adams 0, 197159f5e6ceSHong Zhang 0, 197259f5e6ceSHong Zhang 0, 197359f5e6ceSHong Zhang 0, 197459f5e6ceSHong Zhang /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 197599cafbc1SBarry Smith }; 1976a30f8f8cSSatish Balay 1977b2573a8aSBarry Smith PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1978a23d5eceSKris Buschelman { 1979476417e5SBarry Smith Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1980dfbe8321SBarry Smith PetscErrorCode ierr; 1981535b19f3SBarry Smith PetscInt i,mbs,Mbs; 19825d2a9ed1SStefano Zampini PetscMPIInt size; 1983a23d5eceSKris Buschelman 1984a23d5eceSKris Buschelman PetscFunctionBegin; 198533d57670SJed Brown ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 198626283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 198726283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1988e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1989476417e5SBarry Smith if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N); 1990476417e5SBarry Smith if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n); 1991899cda47SBarry Smith 1992d0f46423SBarry Smith mbs = B->rmap->n/bs; 1993d0f46423SBarry Smith Mbs = B->rmap->N/bs; 1994c2fc9fa9SBarry Smith if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1995a23d5eceSKris Buschelman 1996d0f46423SBarry Smith B->rmap->bs = bs; 1997a23d5eceSKris Buschelman b->bs2 = bs*bs; 1998a23d5eceSKris Buschelman b->mbs = mbs; 1999a23d5eceSKris Buschelman b->Mbs = Mbs; 2000de64b629SHong Zhang b->nbs = B->cmap->n/bs; 2001de64b629SHong Zhang b->Nbs = B->cmap->N/bs; 2002a23d5eceSKris Buschelman 2003a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2004d0f46423SBarry Smith b->rangebs[i] = B->rmap->range[i]/bs; 2005a23d5eceSKris Buschelman } 2006d0f46423SBarry Smith b->rstartbs = B->rmap->rstart/bs; 2007d0f46423SBarry Smith b->rendbs = B->rmap->rend/bs; 2008a23d5eceSKris Buschelman 2009d0f46423SBarry Smith b->cstartbs = B->cmap->rstart/bs; 2010d0f46423SBarry Smith b->cendbs = B->cmap->rend/bs; 2011a23d5eceSKris Buschelman 2012cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE) 2013cb7b82ddSBarry Smith ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2014cb7b82ddSBarry Smith #else 2015cb7b82ddSBarry Smith ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2016cb7b82ddSBarry Smith #endif 2017cb7b82ddSBarry Smith ierr = PetscFree(b->garray);CHKERRQ(ierr); 2018cb7b82ddSBarry Smith ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2019cb7b82ddSBarry Smith ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2020cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr); 2021cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr); 2022cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr); 2023cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr); 2024cb7b82ddSBarry Smith ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr); 2025cb7b82ddSBarry Smith ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr); 2026cb7b82ddSBarry Smith 2027cb7b82ddSBarry Smith /* Because the B will have been resized we simply destroy it and create a new one each time */ 20285d2a9ed1SStefano Zampini ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2029cb7b82ddSBarry Smith ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2030cb7b82ddSBarry Smith ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 20315d2a9ed1SStefano Zampini ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2032cb7b82ddSBarry Smith ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2033cb7b82ddSBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2034cb7b82ddSBarry Smith 2035526dfc15SBarry Smith if (!B->preallocated) { 2036f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2037d0f46423SBarry Smith ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 20389c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 20393bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2040ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2041526dfc15SBarry Smith } 2042a23d5eceSKris Buschelman 2043526dfc15SBarry Smith ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2044526dfc15SBarry Smith ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 204526fbe8dcSKarl Rupp 2046526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 2047cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 2048cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 2049a23d5eceSKris Buschelman PetscFunctionReturn(0); 2050a23d5eceSKris Buschelman } 2051a23d5eceSKris Buschelman 2052dfb205c3SBarry Smith PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2053dfb205c3SBarry Smith { 205402106b30SBarry Smith PetscInt m,rstart,cend; 20550cd7f59aSBarry Smith PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0; 2056dfb205c3SBarry Smith const PetscInt *JJ =0; 2057dfb205c3SBarry Smith PetscScalar *values=0; 2058bb80cfbbSStefano Zampini PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented; 2059dfb205c3SBarry Smith PetscErrorCode ierr; 20603bd0feecSPierre Jolivet PetscBool nooffprocentries; 2061dfb205c3SBarry Smith 2062dfb205c3SBarry Smith PetscFunctionBegin; 2063ce94432eSBarry Smith if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2064dfb205c3SBarry Smith ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2065dfb205c3SBarry Smith ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2066dfb205c3SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2067dfb205c3SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2068e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2069dfb205c3SBarry Smith m = B->rmap->n/bs; 2070dfb205c3SBarry Smith rstart = B->rmap->rstart/bs; 2071dfb205c3SBarry Smith cend = B->cmap->rend/bs; 2072dfb205c3SBarry Smith 2073dfb205c3SBarry Smith if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2074dcca6d9dSJed Brown ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2075dfb205c3SBarry Smith for (i=0; i<m; i++) { 2076dfb205c3SBarry Smith nz = ii[i+1] - ii[i]; 2077dfb205c3SBarry Smith if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 20780cd7f59aSBarry Smith /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2079dfb205c3SBarry Smith JJ = jj + ii[i]; 20800cd7f59aSBarry Smith bd = 0; 2081dfb205c3SBarry Smith for (j=0; j<nz; j++) { 20820cd7f59aSBarry Smith if (*JJ >= i + rstart) break; 2083dfb205c3SBarry Smith JJ++; 20840cd7f59aSBarry Smith bd++; 2085dfb205c3SBarry Smith } 2086dfb205c3SBarry Smith d = 0; 2087dfb205c3SBarry Smith for (; j<nz; j++) { 2088dfb205c3SBarry Smith if (*JJ++ >= cend) break; 2089dfb205c3SBarry Smith d++; 2090dfb205c3SBarry Smith } 2091dfb205c3SBarry Smith d_nnz[i] = d; 20920cd7f59aSBarry Smith o_nnz[i] = nz - d - bd; 20930cd7f59aSBarry Smith nz = nz - bd; 20940cd7f59aSBarry Smith nz_max = PetscMax(nz_max,nz); 2095dfb205c3SBarry Smith } 2096dfb205c3SBarry Smith ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2097bb80cfbbSStefano Zampini ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2098dfb205c3SBarry Smith ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2099dfb205c3SBarry Smith 2100dfb205c3SBarry Smith values = (PetscScalar*)V; 2101dfb205c3SBarry Smith if (!values) { 2102580bdb30SBarry Smith ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2103dfb205c3SBarry Smith } 2104dfb205c3SBarry Smith for (i=0; i<m; i++) { 2105dfb205c3SBarry Smith PetscInt row = i + rstart; 2106dfb205c3SBarry Smith PetscInt ncols = ii[i+1] - ii[i]; 2107dfb205c3SBarry Smith const PetscInt *icols = jj + ii[i]; 2108bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2109dfb205c3SBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2110dfb205c3SBarry Smith ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2111bb80cfbbSStefano Zampini } else { /* block ordering does not match so we can only insert one block at a time. */ 2112bb80cfbbSStefano Zampini PetscInt j; 21130cd7f59aSBarry Smith for (j=0; j<ncols; j++) { 21140cd7f59aSBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 21150cd7f59aSBarry Smith ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 21160cd7f59aSBarry Smith } 21170cd7f59aSBarry Smith } 2118dfb205c3SBarry Smith } 2119dfb205c3SBarry Smith 2120dfb205c3SBarry Smith if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 21213bd0feecSPierre Jolivet nooffprocentries = B->nooffprocentries; 21223bd0feecSPierre Jolivet B->nooffprocentries = PETSC_TRUE; 2123dfb205c3SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2124dfb205c3SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 21253bd0feecSPierre Jolivet B->nooffprocentries = nooffprocentries; 21263bd0feecSPierre Jolivet 21277827cd58SJed Brown ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2128dfb205c3SBarry Smith PetscFunctionReturn(0); 2129dfb205c3SBarry Smith } 2130dfb205c3SBarry Smith 21310bad9183SKris Buschelman /*MC 2132fafad747SKris Buschelman MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2133828413b8SBarry Smith based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2134828413b8SBarry Smith the matrix is stored. 2135828413b8SBarry Smith 2136828413b8SBarry Smith For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2137828413b8SBarry Smith can call MatSetOption(Mat, MAT_HERMITIAN); 21380bad9183SKris Buschelman 21390bad9183SKris Buschelman Options Database Keys: 21400bad9183SKris Buschelman . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 21410bad9183SKris Buschelman 2142476417e5SBarry Smith Notes: 2143476417e5SBarry Smith The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2144476417e5SBarry Smith diagonal portion of the matrix of each process has to less than or equal the number of columns. 2145476417e5SBarry Smith 21460bad9183SKris Buschelman Level: beginner 21470bad9183SKris Buschelman 2148fd292e60Sprj- .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType 21490bad9183SKris Buschelman M*/ 21500bad9183SKris Buschelman 21518cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2152b5df2d14SHong Zhang { 2153b5df2d14SHong Zhang Mat_MPISBAIJ *b; 2154dfbe8321SBarry Smith PetscErrorCode ierr; 215594ae4db5SBarry Smith PetscBool flg = PETSC_FALSE; 2156b5df2d14SHong Zhang 2157b5df2d14SHong Zhang PetscFunctionBegin; 2158b00a9115SJed Brown ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2159b0a32e0cSBarry Smith B->data = (void*)b; 2160b5df2d14SHong Zhang ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2161b5df2d14SHong Zhang 2162b5df2d14SHong Zhang B->ops->destroy = MatDestroy_MPISBAIJ; 2163b5df2d14SHong Zhang B->ops->view = MatView_MPISBAIJ; 2164b5df2d14SHong Zhang B->assembled = PETSC_FALSE; 2165b5df2d14SHong Zhang B->insertmode = NOT_SET_VALUES; 216626fbe8dcSKarl Rupp 2167ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2168ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2169b5df2d14SHong Zhang 2170b5df2d14SHong Zhang /* build local table of row and column ownerships */ 2171854ce69bSBarry Smith ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 2172b5df2d14SHong Zhang 2173b5df2d14SHong Zhang /* build cache for off array entries formed */ 2174ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 217526fbe8dcSKarl Rupp 2176b5df2d14SHong Zhang b->donotstash = PETSC_FALSE; 21770298fd71SBarry Smith b->colmap = NULL; 21780298fd71SBarry Smith b->garray = NULL; 2179b5df2d14SHong Zhang b->roworiented = PETSC_TRUE; 2180b5df2d14SHong Zhang 2181b5df2d14SHong Zhang /* stuff used in block assembly */ 2182b5df2d14SHong Zhang b->barray = 0; 2183b5df2d14SHong Zhang 2184b5df2d14SHong Zhang /* stuff used for matrix vector multiply */ 2185b5df2d14SHong Zhang b->lvec = 0; 2186b5df2d14SHong Zhang b->Mvctx = 0; 218740781036SHong Zhang b->slvec0 = 0; 218840781036SHong Zhang b->slvec0b = 0; 218940781036SHong Zhang b->slvec1 = 0; 219040781036SHong Zhang b->slvec1a = 0; 219140781036SHong Zhang b->slvec1b = 0; 219240781036SHong Zhang b->sMvctx = 0; 2193b5df2d14SHong Zhang 2194b5df2d14SHong Zhang /* stuff for MatGetRow() */ 2195b5df2d14SHong Zhang b->rowindices = 0; 2196b5df2d14SHong Zhang b->rowvalues = 0; 2197b5df2d14SHong Zhang b->getrowactive = PETSC_FALSE; 2198b5df2d14SHong Zhang 2199b5df2d14SHong Zhang /* hash table stuff */ 2200b5df2d14SHong Zhang b->ht = 0; 2201b5df2d14SHong Zhang b->hd = 0; 2202b5df2d14SHong Zhang b->ht_size = 0; 2203b5df2d14SHong Zhang b->ht_flag = PETSC_FALSE; 2204b5df2d14SHong Zhang b->ht_fact = 0; 2205b5df2d14SHong Zhang b->ht_total_ct = 0; 2206b5df2d14SHong Zhang b->ht_insert_ct = 0; 2207b5df2d14SHong Zhang 22087dae84e0SHong Zhang /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 22097a868f3eSHong Zhang b->ijonly = PETSC_FALSE; 22107a868f3eSHong Zhang 221159ffdab8SBarry Smith b->in_loc = 0; 221259ffdab8SBarry Smith b->v_loc = 0; 221359ffdab8SBarry Smith b->n_loc = 0; 221494ae4db5SBarry Smith 2215bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2216bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2217bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 2218bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 22196214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 22206214f412SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 22216214f412SHong Zhang #endif 2222*d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 2223*d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK);CHKERRQ(ierr); 2224*d24d4204SJose E. Roman #endif 222528d58a37SPierre Jolivet ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 222628d58a37SPierre Jolivet ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr); 2227aa5a9175SDahai Guo 222823ce1328SBarry Smith B->symmetric = PETSC_TRUE; 222923ce1328SBarry Smith B->structurally_symmetric = PETSC_TRUE; 223023ce1328SBarry Smith B->symmetric_set = PETSC_TRUE; 223123ce1328SBarry Smith B->structurally_symmetric_set = PETSC_TRUE; 22329899f194SHong Zhang B->symmetric_eternal = PETSC_TRUE; 2233eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX) 223413647f61SHong Zhang B->hermitian = PETSC_FALSE; 223513647f61SHong Zhang B->hermitian_set = PETSC_FALSE; 2236eb1ec7c1SStefano Zampini #else 2237eb1ec7c1SStefano Zampini B->hermitian = PETSC_TRUE; 2238eb1ec7c1SStefano Zampini B->hermitian_set = PETSC_TRUE; 2239eb1ec7c1SStefano Zampini #endif 224013647f61SHong Zhang 224117667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 224294ae4db5SBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 224394ae4db5SBarry Smith ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 224494ae4db5SBarry Smith if (flg) { 224594ae4db5SBarry Smith PetscReal fact = 1.39; 224694ae4db5SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 224794ae4db5SBarry Smith ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 224894ae4db5SBarry Smith if (fact <= 1.0) fact = 1.39; 224994ae4db5SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 225094ae4db5SBarry Smith ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 225194ae4db5SBarry Smith } 225294ae4db5SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 2253b5df2d14SHong Zhang PetscFunctionReturn(0); 2254b5df2d14SHong Zhang } 2255b5df2d14SHong Zhang 2256209238afSKris Buschelman /*MC 2257002d173eSKris Buschelman MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2258209238afSKris Buschelman 2259209238afSKris Buschelman This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 2260209238afSKris Buschelman and MATMPISBAIJ otherwise. 2261209238afSKris Buschelman 2262209238afSKris Buschelman Options Database Keys: 2263209238afSKris Buschelman . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2264209238afSKris Buschelman 2265209238afSKris Buschelman Level: beginner 2266209238afSKris Buschelman 2267209238afSKris Buschelman .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ 2268209238afSKris Buschelman M*/ 2269209238afSKris Buschelman 2270b5df2d14SHong Zhang /*@C 2271b5df2d14SHong Zhang MatMPISBAIJSetPreallocation - For good matrix assembly performance 2272b5df2d14SHong Zhang the user should preallocate the matrix storage by setting the parameters 2273b5df2d14SHong Zhang d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2274b5df2d14SHong Zhang performance can be increased by more than a factor of 50. 2275b5df2d14SHong Zhang 2276b5df2d14SHong Zhang Collective on Mat 2277b5df2d14SHong Zhang 2278b5df2d14SHong Zhang Input Parameters: 22791c4f3114SJed Brown + B - the matrix 2280bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2281bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2282b5df2d14SHong Zhang . d_nz - number of block nonzeros per block row in diagonal portion of local 2283b5df2d14SHong Zhang submatrix (same for all local rows) 2284b5df2d14SHong Zhang . d_nnz - array containing the number of block nonzeros in the various block rows 22856d10fdaeSSatish Balay in the upper triangular and diagonal part of the in diagonal portion of the local 22860298fd71SBarry Smith (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 228795742e49SBarry Smith for the diagonal entry and set a value even if it is zero. 2288b5df2d14SHong Zhang . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2289b5df2d14SHong Zhang submatrix (same for all local rows). 2290b5df2d14SHong Zhang - o_nnz - array containing the number of nonzeros in the various block rows of the 2291c2fc9fa9SBarry Smith off-diagonal portion of the local submatrix that is right of the diagonal 22920298fd71SBarry Smith (possibly different for each block row) or NULL. 2293b5df2d14SHong Zhang 2294b5df2d14SHong Zhang 2295b5df2d14SHong Zhang Options Database Keys: 2296a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2297b5df2d14SHong Zhang block calculations (much slower) 2298a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use 2299b5df2d14SHong Zhang 2300b5df2d14SHong Zhang Notes: 2301b5df2d14SHong Zhang 2302b5df2d14SHong Zhang If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2303b5df2d14SHong Zhang than it must be used on all processors that share the object for that argument. 2304b5df2d14SHong Zhang 230549a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 230649a6f317SBarry Smith 2307b5df2d14SHong Zhang Storage Information: 2308b5df2d14SHong Zhang For a square global matrix we define each processor's diagonal portion 2309b5df2d14SHong Zhang to be its local rows and the corresponding columns (a square submatrix); 2310b5df2d14SHong Zhang each processor's off-diagonal portion encompasses the remainder of the 2311b5df2d14SHong Zhang local matrix (a rectangular submatrix). 2312b5df2d14SHong Zhang 2313b5df2d14SHong Zhang The user can specify preallocated storage for the diagonal part of 2314b5df2d14SHong Zhang the local submatrix with either d_nz or d_nnz (not both). Set 23150298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2316b5df2d14SHong Zhang memory allocation. Likewise, specify preallocated storage for the 2317b5df2d14SHong Zhang off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2318b5df2d14SHong Zhang 2319aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 2320aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2321aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 2322aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 2323aa95bbe8SBarry Smith 2324b5df2d14SHong Zhang Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2325b5df2d14SHong Zhang the figure below we depict these three local rows and all columns (0-11). 2326b5df2d14SHong Zhang 2327b5df2d14SHong Zhang .vb 2328b5df2d14SHong Zhang 0 1 2 3 4 5 6 7 8 9 10 11 2329a4b1a0f6SJed Brown -------------------------- 2330c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2331c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2332c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2333a4b1a0f6SJed Brown -------------------------- 2334b5df2d14SHong Zhang .ve 2335b5df2d14SHong Zhang 2336b5df2d14SHong Zhang Thus, any entries in the d locations are stored in the d (diagonal) 2337b5df2d14SHong Zhang submatrix, and any entries in the o locations are stored in the 23386d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 23396d10fdaeSSatish Balay MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2340b5df2d14SHong Zhang 23416d10fdaeSSatish Balay Now d_nz should indicate the number of block nonzeros per row in the upper triangular 23426d10fdaeSSatish Balay plus the diagonal part of the d matrix, 2343c2fc9fa9SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix 2344c2fc9fa9SBarry Smith 2345b5df2d14SHong Zhang In general, for PDE problems in which most nonzeros are near the diagonal, 2346b5df2d14SHong Zhang one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2347b5df2d14SHong Zhang or you will get TERRIBLE performance; see the users' manual chapter on 2348b5df2d14SHong Zhang matrices. 2349b5df2d14SHong Zhang 2350b5df2d14SHong Zhang Level: intermediate 2351b5df2d14SHong Zhang 2352ab978733SBarry Smith .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2353b5df2d14SHong Zhang @*/ 23547087cfbeSBarry Smith PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2355b5df2d14SHong Zhang { 23564ac538c5SBarry Smith PetscErrorCode ierr; 2357b5df2d14SHong Zhang 2358b5df2d14SHong Zhang PetscFunctionBegin; 23596ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 23606ba663aaSJed Brown PetscValidType(B,1); 23616ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 23624ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2363b5df2d14SHong Zhang PetscFunctionReturn(0); 2364b5df2d14SHong Zhang } 2365b5df2d14SHong Zhang 2366a30f8f8cSSatish Balay /*@C 236769b1f4b7SBarry Smith MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2368a30f8f8cSSatish Balay (block compressed row). For good matrix assembly performance 2369a30f8f8cSSatish Balay the user should preallocate the matrix storage by setting the parameters 2370a30f8f8cSSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2371a30f8f8cSSatish Balay performance can be increased by more than a factor of 50. 2372a30f8f8cSSatish Balay 2373d083f849SBarry Smith Collective 2374a30f8f8cSSatish Balay 2375a30f8f8cSSatish Balay Input Parameters: 2376a30f8f8cSSatish Balay + comm - MPI communicator 2377bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2378bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2379a30f8f8cSSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2380a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2381a30f8f8cSSatish Balay y vector for the matrix-vector product y = Ax. 2382a30f8f8cSSatish Balay . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2383a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2384a30f8f8cSSatish Balay x vector for the matrix-vector product y = Ax. 2385a30f8f8cSSatish Balay . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2386a30f8f8cSSatish Balay . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2387a30f8f8cSSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 2388a30f8f8cSSatish Balay submatrix (same for all local rows) 2389a30f8f8cSSatish Balay . d_nnz - array containing the number of block nonzeros in the various block rows 23906d10fdaeSSatish Balay in the upper triangular portion of the in diagonal portion of the local 23910298fd71SBarry Smith (possibly different for each block block row) or NULL. 239295742e49SBarry Smith If you plan to factor the matrix you must leave room for the diagonal entry and 239395742e49SBarry Smith set its value even if it is zero. 2394a30f8f8cSSatish Balay . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2395a30f8f8cSSatish Balay submatrix (same for all local rows). 2396a30f8f8cSSatish Balay - o_nnz - array containing the number of nonzeros in the various block rows of the 2397a30f8f8cSSatish Balay off-diagonal portion of the local submatrix (possibly different for 23980298fd71SBarry Smith each block row) or NULL. 2399a30f8f8cSSatish Balay 2400a30f8f8cSSatish Balay Output Parameter: 2401a30f8f8cSSatish Balay . A - the matrix 2402a30f8f8cSSatish Balay 2403a30f8f8cSSatish Balay Options Database Keys: 2404a2b725a8SWilliam Gropp + -mat_no_unroll - uses code that does not unroll the loops in the 2405a30f8f8cSSatish Balay block calculations (much slower) 2406a30f8f8cSSatish Balay . -mat_block_size - size of the blocks to use 2407a2b725a8SWilliam Gropp - -mat_mpi - use the parallel matrix data structures even on one processor 2408a30f8f8cSSatish Balay (defaults to using SeqBAIJ format on one processor) 2409a30f8f8cSSatish Balay 2410175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2411f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 2412175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2413175b88e8SBarry Smith 2414a30f8f8cSSatish Balay Notes: 2415d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 24166d6d819aSHong Zhang This matrix type does not support complex Hermitian operation. 2417d1be2dadSMatthew Knepley 2418a30f8f8cSSatish Balay The user MUST specify either the local or global matrix dimensions 2419a30f8f8cSSatish Balay (possibly both). 2420a30f8f8cSSatish Balay 2421a30f8f8cSSatish Balay If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2422a30f8f8cSSatish Balay than it must be used on all processors that share the object for that argument. 2423a30f8f8cSSatish Balay 242449a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 242549a6f317SBarry Smith 2426a30f8f8cSSatish Balay Storage Information: 2427a30f8f8cSSatish Balay For a square global matrix we define each processor's diagonal portion 2428a30f8f8cSSatish Balay to be its local rows and the corresponding columns (a square submatrix); 2429a30f8f8cSSatish Balay each processor's off-diagonal portion encompasses the remainder of the 2430a30f8f8cSSatish Balay local matrix (a rectangular submatrix). 2431a30f8f8cSSatish Balay 2432a30f8f8cSSatish Balay The user can specify preallocated storage for the diagonal part of 2433a30f8f8cSSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 24340298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2435a30f8f8cSSatish Balay memory allocation. Likewise, specify preallocated storage for the 2436a30f8f8cSSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2437a30f8f8cSSatish Balay 2438a30f8f8cSSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2439a30f8f8cSSatish Balay the figure below we depict these three local rows and all columns (0-11). 2440a30f8f8cSSatish Balay 2441a30f8f8cSSatish Balay .vb 2442a30f8f8cSSatish Balay 0 1 2 3 4 5 6 7 8 9 10 11 2443a4b1a0f6SJed Brown -------------------------- 2444c2fc9fa9SBarry Smith row 3 |. . . d d d o o o o o o 2445c2fc9fa9SBarry Smith row 4 |. . . d d d o o o o o o 2446c2fc9fa9SBarry Smith row 5 |. . . d d d o o o o o o 2447a4b1a0f6SJed Brown -------------------------- 2448a30f8f8cSSatish Balay .ve 2449a30f8f8cSSatish Balay 2450a30f8f8cSSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 2451a30f8f8cSSatish Balay submatrix, and any entries in the o locations are stored in the 24526d10fdaeSSatish Balay o (off-diagonal) submatrix. Note that the d matrix is stored in 24536d10fdaeSSatish Balay MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2454a30f8f8cSSatish Balay 24556d10fdaeSSatish Balay Now d_nz should indicate the number of block nonzeros per row in the upper triangular 24566d10fdaeSSatish Balay plus the diagonal part of the d matrix, 2457a30f8f8cSSatish Balay and o_nz should indicate the number of block nonzeros per row in the o matrix. 2458a30f8f8cSSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 2459a30f8f8cSSatish Balay one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2460a30f8f8cSSatish Balay or you will get TERRIBLE performance; see the users' manual chapter on 2461a30f8f8cSSatish Balay matrices. 2462a30f8f8cSSatish Balay 2463a30f8f8cSSatish Balay Level: intermediate 2464a30f8f8cSSatish Balay 246569b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2466a30f8f8cSSatish Balay @*/ 2467a30f8f8cSSatish Balay 246869b1f4b7SBarry Smith PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2469a30f8f8cSSatish Balay { 24706849ba73SBarry Smith PetscErrorCode ierr; 24711302d50aSBarry Smith PetscMPIInt size; 2472a30f8f8cSSatish Balay 2473a30f8f8cSSatish Balay PetscFunctionBegin; 2474f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 2475f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2476273d9f13SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2477273d9f13SBarry Smith if (size > 1) { 2478b5df2d14SHong Zhang ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2479b5df2d14SHong Zhang ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2480273d9f13SBarry Smith } else { 2481273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2482273d9f13SBarry Smith ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2483273d9f13SBarry Smith } 2484a30f8f8cSSatish Balay PetscFunctionReturn(0); 2485a30f8f8cSSatish Balay } 2486a30f8f8cSSatish Balay 2487a30f8f8cSSatish Balay 24886849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2489a30f8f8cSSatish Balay { 2490a30f8f8cSSatish Balay Mat mat; 2491a30f8f8cSSatish Balay Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2492dfbe8321SBarry Smith PetscErrorCode ierr; 2493d0f46423SBarry Smith PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2494387bc808SHong Zhang PetscScalar *array; 2495a30f8f8cSSatish Balay 2496a30f8f8cSSatish Balay PetscFunctionBegin; 2497a30f8f8cSSatish Balay *newmat = 0; 249826fbe8dcSKarl Rupp 2499ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2500d0f46423SBarry Smith ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 25017adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 25021e1e43feSBarry Smith ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 25031e1e43feSBarry Smith ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2504e1b6402fSHong Zhang 2505d5f3da31SBarry Smith mat->factortype = matin->factortype; 2506273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 250782327fa8SHong Zhang mat->assembled = PETSC_TRUE; 25087fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 25097fff6886SHong Zhang 2510b5df2d14SHong Zhang a = (Mat_MPISBAIJ*)mat->data; 2511a30f8f8cSSatish Balay a->bs2 = oldmat->bs2; 2512a30f8f8cSSatish Balay a->mbs = oldmat->mbs; 2513a30f8f8cSSatish Balay a->nbs = oldmat->nbs; 2514a30f8f8cSSatish Balay a->Mbs = oldmat->Mbs; 2515a30f8f8cSSatish Balay a->Nbs = oldmat->Nbs; 2516a30f8f8cSSatish Balay 2517a30f8f8cSSatish Balay a->size = oldmat->size; 2518a30f8f8cSSatish Balay a->rank = oldmat->rank; 2519a30f8f8cSSatish Balay a->donotstash = oldmat->donotstash; 2520a30f8f8cSSatish Balay a->roworiented = oldmat->roworiented; 2521a30f8f8cSSatish Balay a->rowindices = 0; 2522a30f8f8cSSatish Balay a->rowvalues = 0; 2523a30f8f8cSSatish Balay a->getrowactive = PETSC_FALSE; 2524a30f8f8cSSatish Balay a->barray = 0; 2525899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 2526899cda47SBarry Smith a->rendbs = oldmat->rendbs; 2527899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 2528899cda47SBarry Smith a->cendbs = oldmat->cendbs; 2529a30f8f8cSSatish Balay 2530a30f8f8cSSatish Balay /* hash table stuff */ 2531a30f8f8cSSatish Balay a->ht = 0; 2532a30f8f8cSSatish Balay a->hd = 0; 2533a30f8f8cSSatish Balay a->ht_size = 0; 2534a30f8f8cSSatish Balay a->ht_flag = oldmat->ht_flag; 2535a30f8f8cSSatish Balay a->ht_fact = oldmat->ht_fact; 2536a30f8f8cSSatish Balay a->ht_total_ct = 0; 2537a30f8f8cSSatish Balay a->ht_insert_ct = 0; 2538a30f8f8cSSatish Balay 2539580bdb30SBarry Smith ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr); 2540a30f8f8cSSatish Balay if (oldmat->colmap) { 2541a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 2542a30f8f8cSSatish Balay ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2543a30f8f8cSSatish Balay #else 2544854ce69bSBarry Smith ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 25453bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2546580bdb30SBarry Smith ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 2547a30f8f8cSSatish Balay #endif 2548a30f8f8cSSatish Balay } else a->colmap = 0; 2549387bc808SHong Zhang 2550a30f8f8cSSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2551785e854fSJed Brown ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 25523bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2553580bdb30SBarry Smith ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 2554a30f8f8cSSatish Balay } else a->garray = 0; 2555a30f8f8cSSatish Balay 2556ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2557a30f8f8cSSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 25583bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2559a30f8f8cSSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 25603bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 256182327fa8SHong Zhang 256282327fa8SHong Zhang ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 25633bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 256482327fa8SHong Zhang ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 25653bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2566387bc808SHong Zhang 2567387bc808SHong Zhang ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 25681ebc52fbSHong Zhang ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2569778a2246SBarry Smith ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2570778a2246SBarry Smith ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 25711ebc52fbSHong Zhang ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 25721ebc52fbSHong Zhang ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2573778a2246SBarry Smith ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 25741ebc52fbSHong Zhang ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 25753bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 25763bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 25773bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 25783bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 25793bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2580387bc808SHong Zhang 2581387bc808SHong Zhang /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2582387bc808SHong Zhang ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2583387bc808SHong Zhang a->sMvctx = oldmat->sMvctx; 25843bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 258582327fa8SHong Zhang 2586a30f8f8cSSatish Balay ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 25873bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2588a30f8f8cSSatish Balay ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 25893bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2590140e18c1SBarry Smith ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2591a30f8f8cSSatish Balay *newmat = mat; 2592a30f8f8cSSatish Balay PetscFunctionReturn(0); 2593a30f8f8cSSatish Balay } 2594a30f8f8cSSatish Balay 2595618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2596618cc2edSLisandro Dalcin #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2597618cc2edSLisandro Dalcin 2598618cc2edSLisandro Dalcin PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer) 259995936485SShri Abhyankar { 260095936485SShri Abhyankar PetscErrorCode ierr; 26017f489da9SVaclav Hapla PetscBool isbinary; 260295936485SShri Abhyankar 260395936485SShri Abhyankar PetscFunctionBegin; 26047f489da9SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2605618cc2edSLisandro Dalcin if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 2606618cc2edSLisandro Dalcin ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 260795936485SShri Abhyankar PetscFunctionReturn(0); 260895936485SShri Abhyankar } 260995936485SShri Abhyankar 2610dcf5cc72SBarry Smith /*XXXXX@ 2611a30f8f8cSSatish Balay MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2612a30f8f8cSSatish Balay 2613a30f8f8cSSatish Balay Input Parameters: 2614a30f8f8cSSatish Balay . mat - the matrix 2615a30f8f8cSSatish Balay . fact - factor 2616a30f8f8cSSatish Balay 2617c5eb9154SBarry Smith Not Collective on Mat, each process can have a different hash factor 2618a30f8f8cSSatish Balay 2619a30f8f8cSSatish Balay Level: advanced 2620a30f8f8cSSatish Balay 2621a30f8f8cSSatish Balay Notes: 2622a30f8f8cSSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2623a30f8f8cSSatish Balay 2624a30f8f8cSSatish Balay .seealso: MatSetOption() 2625dcf5cc72SBarry Smith @XXXXX*/ 2626dcf5cc72SBarry Smith 262724d5174aSHong Zhang 2628985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 262924d5174aSHong Zhang { 263024d5174aSHong Zhang Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2631f4c0e9e4SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2632ca54ac64SHong Zhang PetscReal atmp; 263387828ca2SBarry Smith PetscReal *work,*svalues,*rvalues; 2634dfbe8321SBarry Smith PetscErrorCode ierr; 26351302d50aSBarry Smith PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 26361302d50aSBarry Smith PetscMPIInt rank,size; 26371302d50aSBarry Smith PetscInt *rowners_bs,dest,count,source; 263887828ca2SBarry Smith PetscScalar *va; 26398a1c53f2SBarry Smith MatScalar *ba; 2640f4c0e9e4SHong Zhang MPI_Status stat; 264124d5174aSHong Zhang 264224d5174aSHong Zhang PetscFunctionBegin; 2643e32f2f54SBarry Smith if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 26440298fd71SBarry Smith ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 26451ebc52fbSHong Zhang ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2646f4c0e9e4SHong Zhang 2647ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2648ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2649f4c0e9e4SHong Zhang 2650d0f46423SBarry Smith bs = A->rmap->bs; 2651f4c0e9e4SHong Zhang mbs = a->mbs; 2652f4c0e9e4SHong Zhang Mbs = a->Mbs; 2653f4c0e9e4SHong Zhang ba = b->a; 2654f4c0e9e4SHong Zhang bi = b->i; 2655f4c0e9e4SHong Zhang bj = b->j; 2656f4c0e9e4SHong Zhang 2657f4c0e9e4SHong Zhang /* find ownerships */ 2658d0f46423SBarry Smith rowners_bs = A->rmap->range; 2659f4c0e9e4SHong Zhang 2660f4c0e9e4SHong Zhang /* each proc creates an array to be distributed */ 2661580bdb30SBarry Smith ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2662f4c0e9e4SHong Zhang 2663f4c0e9e4SHong Zhang /* row_max for B */ 2664b8475685SHong Zhang if (rank != size-1) { 2665f4c0e9e4SHong Zhang for (i=0; i<mbs; i++) { 2666f4c0e9e4SHong Zhang ncols = bi[1] - bi[0]; bi++; 2667f4c0e9e4SHong Zhang brow = bs*i; 2668f4c0e9e4SHong Zhang for (j=0; j<ncols; j++) { 2669f4c0e9e4SHong Zhang bcol = bs*(*bj); 2670f4c0e9e4SHong Zhang for (kcol=0; kcol<bs; kcol++) { 2671ca54ac64SHong Zhang col = bcol + kcol; /* local col index */ 267204d41228SHong Zhang col += rowners_bs[rank+1]; /* global col index */ 2673f4c0e9e4SHong Zhang for (krow=0; krow<bs; krow++) { 2674f4c0e9e4SHong Zhang atmp = PetscAbsScalar(*ba); ba++; 2675ca54ac64SHong Zhang row = brow + krow; /* local row index */ 2676ca54ac64SHong Zhang if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2677f4c0e9e4SHong Zhang if (work[col] < atmp) work[col] = atmp; 2678f4c0e9e4SHong Zhang } 2679f4c0e9e4SHong Zhang } 2680f4c0e9e4SHong Zhang bj++; 2681f4c0e9e4SHong Zhang } 2682f4c0e9e4SHong Zhang } 2683f4c0e9e4SHong Zhang 2684f4c0e9e4SHong Zhang /* send values to its owners */ 2685f4c0e9e4SHong Zhang for (dest=rank+1; dest<size; dest++) { 2686f4c0e9e4SHong Zhang svalues = work + rowners_bs[dest]; 2687ca54ac64SHong Zhang count = rowners_bs[dest+1]-rowners_bs[dest]; 2688ce94432eSBarry Smith ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2689ca54ac64SHong Zhang } 2690f4c0e9e4SHong Zhang } 2691f4c0e9e4SHong Zhang 2692f4c0e9e4SHong Zhang /* receive values */ 2693ca54ac64SHong Zhang if (rank) { 2694f4c0e9e4SHong Zhang rvalues = work; 2695ca54ac64SHong Zhang count = rowners_bs[rank+1]-rowners_bs[rank]; 2696f4c0e9e4SHong Zhang for (source=0; source<rank; source++) { 2697ce94432eSBarry Smith ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2698f4c0e9e4SHong Zhang /* process values */ 2699f4c0e9e4SHong Zhang for (i=0; i<count; i++) { 2700ca54ac64SHong Zhang if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2701f4c0e9e4SHong Zhang } 2702f4c0e9e4SHong Zhang } 2703ca54ac64SHong Zhang } 2704f4c0e9e4SHong Zhang 27051ebc52fbSHong Zhang ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2706ac355199SBarry Smith ierr = PetscFree(work);CHKERRQ(ierr); 270724d5174aSHong Zhang PetscFunctionReturn(0); 270824d5174aSHong Zhang } 27092798e883SHong Zhang 271041f059aeSBarry Smith PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 27112798e883SHong Zhang { 27122798e883SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2713dfbe8321SBarry Smith PetscErrorCode ierr; 2714d0f46423SBarry Smith PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 27153649974fSBarry Smith PetscScalar *x,*ptr,*from; 2716ffe4fb16SHong Zhang Vec bb1; 27173649974fSBarry Smith const PetscScalar *b; 2718ffe4fb16SHong Zhang 2719ffe4fb16SHong Zhang PetscFunctionBegin; 2720e32f2f54SBarry Smith if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2721e32f2f54SBarry Smith if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2722ffe4fb16SHong Zhang 2723a2b30743SBarry Smith if (flag == SOR_APPLY_UPPER) { 272441f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2725a2b30743SBarry Smith PetscFunctionReturn(0); 2726a2b30743SBarry Smith } 2727a2b30743SBarry Smith 2728ffe4fb16SHong Zhang if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2729ffe4fb16SHong Zhang if (flag & SOR_ZERO_INITIAL_GUESS) { 273041f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2731ffe4fb16SHong Zhang its--; 2732ffe4fb16SHong Zhang } 2733ffe4fb16SHong Zhang 2734ffe4fb16SHong Zhang ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2735ffe4fb16SHong Zhang while (its--) { 2736ffe4fb16SHong Zhang 2737ffe4fb16SHong Zhang /* lower triangular part: slvec0b = - B^T*xx */ 2738ffe4fb16SHong Zhang ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2739ffe4fb16SHong Zhang 2740ffe4fb16SHong Zhang /* copy xx into slvec0a */ 27411ebc52fbSHong Zhang ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 27421ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2743580bdb30SBarry Smith ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr); 27441ebc52fbSHong Zhang ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2745ffe4fb16SHong Zhang 2746efb30889SBarry Smith ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2747ffe4fb16SHong Zhang 2748ffe4fb16SHong Zhang /* copy bb into slvec1a */ 27491ebc52fbSHong Zhang ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 27503649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2751580bdb30SBarry Smith ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr); 27521ebc52fbSHong Zhang ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2753ffe4fb16SHong Zhang 2754ffe4fb16SHong Zhang /* set slvec1b = 0 */ 2755fa22f6d0SBarry Smith ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2756ffe4fb16SHong Zhang 2757ca9f406cSSatish Balay ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 27581ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2759a8b09249SBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2760ca9f406cSSatish Balay ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2761ffe4fb16SHong Zhang 2762ffe4fb16SHong Zhang /* upper triangular part: bb1 = bb1 - B*x */ 2763ffe4fb16SHong Zhang ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2764ffe4fb16SHong Zhang 2765ffe4fb16SHong Zhang /* local diagonal sweep */ 276641f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2767ffe4fb16SHong Zhang } 27686bf464f9SBarry Smith ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2769fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 277041f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2771fa22f6d0SBarry Smith } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 277241f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2773fa22f6d0SBarry Smith } else if (flag & SOR_EISENSTAT) { 2774fa22f6d0SBarry Smith Vec xx1; 2775ace3abfcSBarry Smith PetscBool hasop; 277620f1ed55SBarry Smith const PetscScalar *diag; 2777887ee2caSBarry Smith PetscScalar *sl,scale = (omega - 2.0)/omega; 277820f1ed55SBarry Smith PetscInt i,n; 2779fa22f6d0SBarry Smith 2780fa22f6d0SBarry Smith if (!mat->xx1) { 2781fa22f6d0SBarry Smith ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2782fa22f6d0SBarry Smith ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2783fa22f6d0SBarry Smith } 2784fa22f6d0SBarry Smith xx1 = mat->xx1; 2785fa22f6d0SBarry Smith bb1 = mat->bb1; 2786fa22f6d0SBarry Smith 278741f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2788fa22f6d0SBarry Smith 2789fa22f6d0SBarry Smith if (!mat->diag) { 2790effcda25SBarry Smith /* this is wrong for same matrix with new nonzero values */ 27912a7a6963SBarry Smith ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2792fa22f6d0SBarry Smith ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2793fa22f6d0SBarry Smith } 2794fa22f6d0SBarry Smith ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2795fa22f6d0SBarry Smith 2796fa22f6d0SBarry Smith if (hasop) { 2797fa22f6d0SBarry Smith ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2798887ee2caSBarry Smith ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 279920f1ed55SBarry Smith } else { 280020f1ed55SBarry Smith /* 280120f1ed55SBarry Smith These two lines are replaced by code that may be a bit faster for a good compiler 280220f1ed55SBarry Smith ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2803887ee2caSBarry Smith ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 280420f1ed55SBarry Smith */ 280520f1ed55SBarry Smith ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 28063649974fSBarry Smith ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 28073649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 280820f1ed55SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 280920f1ed55SBarry Smith ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2810887ee2caSBarry Smith if (omega == 1.0) { 281126fbe8dcSKarl Rupp for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 281220f1ed55SBarry Smith ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2813887ee2caSBarry Smith } else { 281426fbe8dcSKarl Rupp for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2815887ee2caSBarry Smith ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2816887ee2caSBarry Smith } 281720f1ed55SBarry Smith ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 28183649974fSBarry Smith ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 28193649974fSBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 282020f1ed55SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 282120f1ed55SBarry Smith } 2822fa22f6d0SBarry Smith 2823fa22f6d0SBarry Smith /* multiply off-diagonal portion of matrix */ 2824fa22f6d0SBarry Smith ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2825fa22f6d0SBarry Smith ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2826fa22f6d0SBarry Smith ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2827fa22f6d0SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2828580bdb30SBarry Smith ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr); 2829fa22f6d0SBarry Smith ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2830fa22f6d0SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2831fa22f6d0SBarry Smith ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2832fa22f6d0SBarry Smith ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2833effcda25SBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2834fa22f6d0SBarry Smith 2835fa22f6d0SBarry Smith /* local sweep */ 283641f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2837fa22f6d0SBarry Smith ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2838f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2839ffe4fb16SHong Zhang PetscFunctionReturn(0); 2840ffe4fb16SHong Zhang } 2841ffe4fb16SHong Zhang 284241f059aeSBarry Smith PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2843ffe4fb16SHong Zhang { 2844ffe4fb16SHong Zhang Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2845dfbe8321SBarry Smith PetscErrorCode ierr; 28462798e883SHong Zhang Vec lvec1,bb1; 28472798e883SHong Zhang 28482798e883SHong Zhang PetscFunctionBegin; 2849e32f2f54SBarry Smith if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2850e32f2f54SBarry Smith if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 28512798e883SHong Zhang 2852c14dc6b6SHong Zhang if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 28532798e883SHong Zhang if (flag & SOR_ZERO_INITIAL_GUESS) { 285441f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 28552798e883SHong Zhang its--; 28562798e883SHong Zhang } 28572798e883SHong Zhang 28582798e883SHong Zhang ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 28592798e883SHong Zhang ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 28602798e883SHong Zhang while (its--) { 2861ca9f406cSSatish Balay ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 28622798e883SHong Zhang 28632798e883SHong Zhang /* lower diagonal part: bb1 = bb - B^T*xx */ 28642798e883SHong Zhang ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2865efb30889SBarry Smith ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 28662798e883SHong Zhang 2867ca9f406cSSatish Balay ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 28682798e883SHong Zhang ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2869ca9f406cSSatish Balay ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 28702798e883SHong Zhang 28712798e883SHong Zhang /* upper diagonal part: bb1 = bb1 - B*x */ 2872efb30889SBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 28732798e883SHong Zhang ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 28742798e883SHong Zhang 2875ca9f406cSSatish Balay ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 28762798e883SHong Zhang 2877c14dc6b6SHong Zhang /* diagonal sweep */ 287841f059aeSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 28792798e883SHong Zhang } 28806bf464f9SBarry Smith ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 28816bf464f9SBarry Smith ierr = VecDestroy(&bb1);CHKERRQ(ierr); 28826bf464f9SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 28832798e883SHong Zhang PetscFunctionReturn(0); 28842798e883SHong Zhang } 28852798e883SHong Zhang 2886dfb205c3SBarry Smith /*@ 2887dfb205c3SBarry Smith MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2888dfb205c3SBarry Smith CSR format the local rows. 2889dfb205c3SBarry Smith 2890d083f849SBarry Smith Collective 2891dfb205c3SBarry Smith 2892dfb205c3SBarry Smith Input Parameters: 2893dfb205c3SBarry Smith + comm - MPI communicator 2894dfb205c3SBarry Smith . bs - the block size, only a block size of 1 is supported 2895dfb205c3SBarry Smith . m - number of local rows (Cannot be PETSC_DECIDE) 2896dfb205c3SBarry Smith . n - This value should be the same as the local size used in creating the 2897dfb205c3SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2898dfb205c3SBarry Smith calculated if N is given) For square matrices n is almost always m. 2899dfb205c3SBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2900dfb205c3SBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2901483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2902dfb205c3SBarry Smith . j - column indices 2903dfb205c3SBarry Smith - a - matrix values 2904dfb205c3SBarry Smith 2905dfb205c3SBarry Smith Output Parameter: 2906dfb205c3SBarry Smith . mat - the matrix 2907dfb205c3SBarry Smith 2908dfb205c3SBarry Smith Level: intermediate 2909dfb205c3SBarry Smith 2910dfb205c3SBarry Smith Notes: 2911dfb205c3SBarry Smith The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2912dfb205c3SBarry Smith thus you CANNOT change the matrix entries by changing the values of a[] after you have 2913dfb205c3SBarry Smith called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2914dfb205c3SBarry Smith 2915dfb205c3SBarry Smith The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2916dfb205c3SBarry Smith 2917dfb205c3SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 291869b1f4b7SBarry Smith MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2919dfb205c3SBarry Smith @*/ 29207087cfbeSBarry Smith PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2921dfb205c3SBarry Smith { 2922dfb205c3SBarry Smith PetscErrorCode ierr; 2923dfb205c3SBarry Smith 2924dfb205c3SBarry Smith 2925dfb205c3SBarry Smith PetscFunctionBegin; 2926f23aa3ddSBarry Smith if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2927dfb205c3SBarry Smith if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2928dfb205c3SBarry Smith ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2929dfb205c3SBarry Smith ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2930dfb205c3SBarry Smith ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2931dfb205c3SBarry Smith ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2932dfb205c3SBarry Smith PetscFunctionReturn(0); 2933dfb205c3SBarry Smith } 2934dfb205c3SBarry Smith 2935dfb205c3SBarry Smith 2936dfb205c3SBarry Smith /*@C 2937664954b6SBarry Smith MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values 2938dfb205c3SBarry Smith 2939d083f849SBarry Smith Collective 2940dfb205c3SBarry Smith 2941dfb205c3SBarry Smith Input Parameters: 29421c4f3114SJed Brown + B - the matrix 2943dfb205c3SBarry Smith . bs - the block size 2944dfb205c3SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2945dfb205c3SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2946dfb205c3SBarry Smith - v - optional values in the matrix 2947dfb205c3SBarry Smith 2948664954b6SBarry Smith Level: advanced 2949664954b6SBarry Smith 2950664954b6SBarry Smith Notes: 29510cd7f59aSBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 29520cd7f59aSBarry Smith and usually the numerical values as well 29530cd7f59aSBarry Smith 295450c5228eSBarry Smith Any entries below the diagonal are ignored 2955dfb205c3SBarry Smith 295669b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2957dfb205c3SBarry Smith @*/ 29587087cfbeSBarry Smith PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2959dfb205c3SBarry Smith { 29604ac538c5SBarry Smith PetscErrorCode ierr; 2961dfb205c3SBarry Smith 2962dfb205c3SBarry Smith PetscFunctionBegin; 29634ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2964dfb205c3SBarry Smith PetscFunctionReturn(0); 2965dfb205c3SBarry Smith } 2966dfb205c3SBarry Smith 296710c56fdeSHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 29684dcd73b1SHong Zhang { 29694dcd73b1SHong Zhang PetscErrorCode ierr; 297010c56fdeSHong Zhang PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 297110c56fdeSHong Zhang PetscInt *indx; 297210c56fdeSHong Zhang PetscScalar *values; 2973dfb205c3SBarry Smith 29744dcd73b1SHong Zhang PetscFunctionBegin; 29754dcd73b1SHong Zhang ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 297610c56fdeSHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 297710c56fdeSHong Zhang Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2978de25e9cbSPierre Jolivet PetscInt *dnz,*onz,mbs,Nbs,nbs; 297910c56fdeSHong Zhang PetscInt *bindx,rmax=a->rmax,j; 2980de25e9cbSPierre Jolivet PetscMPIInt rank,size; 29814dcd73b1SHong Zhang 298210c56fdeSHong Zhang ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 298310c56fdeSHong Zhang mbs = m/bs; Nbs = N/cbs; 298410c56fdeSHong Zhang if (n == PETSC_DECIDE) { 2985da91a574SPierre Jolivet ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 298610c56fdeSHong Zhang } 2987da91a574SPierre Jolivet nbs = n/cbs; 29884dcd73b1SHong Zhang 29894dcd73b1SHong Zhang ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2990de25e9cbSPierre Jolivet ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 2991de25e9cbSPierre Jolivet 2992de25e9cbSPierre Jolivet ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2993de25e9cbSPierre Jolivet ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 2994de25e9cbSPierre Jolivet if (rank == size-1) { 2995de25e9cbSPierre Jolivet /* Check sum(nbs) = Nbs */ 2996de25e9cbSPierre Jolivet if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 2997de25e9cbSPierre Jolivet } 2998de25e9cbSPierre Jolivet 2999de25e9cbSPierre Jolivet rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3000e491d569SHong Zhang ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 300110c56fdeSHong Zhang for (i=0; i<mbs; i++) { 30024dcd73b1SHong Zhang ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 30034dcd73b1SHong Zhang nnz = nnz/bs; 30044dcd73b1SHong Zhang for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 30054dcd73b1SHong Zhang ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 30064dcd73b1SHong Zhang ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 30074dcd73b1SHong Zhang } 3008e491d569SHong Zhang ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 30094dcd73b1SHong Zhang ierr = PetscFree(bindx);CHKERRQ(ierr); 30104dcd73b1SHong Zhang 30114dcd73b1SHong Zhang ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3012de25e9cbSPierre Jolivet ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 30134dcd73b1SHong Zhang ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3014ce94fc41SPierre Jolivet ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr); 3015ce94fc41SPierre Jolivet ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 30164dcd73b1SHong Zhang ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 30174dcd73b1SHong Zhang ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 30184dcd73b1SHong Zhang } 30194dcd73b1SHong Zhang 302010c56fdeSHong Zhang /* numeric phase */ 30214dcd73b1SHong Zhang ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 302210c56fdeSHong Zhang ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 30234dcd73b1SHong Zhang 3024e491d569SHong Zhang ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 30254dcd73b1SHong Zhang for (i=0; i<m; i++) { 30264dcd73b1SHong Zhang ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 30274dcd73b1SHong Zhang Ii = i + rstart; 302810c56fdeSHong Zhang ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 30294dcd73b1SHong Zhang ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 30304dcd73b1SHong Zhang } 3031e491d569SHong Zhang ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 303210c56fdeSHong Zhang ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 303310c56fdeSHong Zhang ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 30344dcd73b1SHong Zhang PetscFunctionReturn(0); 30354dcd73b1SHong Zhang } 3036